source: trunk/Modules/Meet/Meet.php@ 57

Last change on this file since 57 was 57, checked in by chronos, 5 years ago
  • Added: Allow to disable MeetSource in database.
  • Added: Show how many meet items were parsed and how many new added.
File size: 13.7 KB
Line 
1<?php
2
3include_once(dirname(__FILE__).'/Import/Seznamka.php');
4include_once(dirname(__FILE__).'/Import/TanecniSkola.php');
5include_once(dirname(__FILE__).'/Import/AstraPraha.php');
6include_once(dirname(__FILE__).'/Import/Vavruska.php');
7include_once(dirname(__FILE__).'/Import/SalsaDance.php');
8include_once(dirname(__FILE__).'/Import/Amblar.php');
9include_once(dirname(__FILE__).'/Import/MajkluvSvet.php');
10include_once(dirname(__FILE__).'/Import/Csts.php');
11include_once(dirname(__FILE__).'/Import/Facebook.php');
12include_once(dirname(__FILE__).'/Import/Eso.php');
13include_once(dirname(__FILE__).'/Import/Ella.php');
14
15abstract class Gender
16{
17 const Undefined = 0;
18 const Male = 1;
19 const Female = 2;
20}
21
22function GetTextBetween(&$Text, $Start, $End)
23{
24 $Result = '';
25 if ((strpos($Text, $Start) !== false) and (strpos($Text, $End) !== false))
26 {
27 $Text = substr($Text, strpos($Text, $Start) + strlen($Start));
28 $Result = substr($Text, 0, strpos($Text, $End));
29 $Text = substr($Text, strpos($Text, $End) + strlen($End));
30 }
31 return $Result;
32}
33
34function HumanDateTimeToTime($DateTime)
35{
36 if ($DateTime == '') return NULL;
37 $DateTime = str_replace('. ', '.', $DateTime);
38 $Parts = explode(' ', $DateTime);
39 $DateParts = explode('.', $Parts[0]);
40 if (count($Parts) > 1) {
41 $TimeParts = explode(':', $Parts[1]);
42 if (count($TimeParts) == 1) $TimeParts[1] = '0';
43 if (count($TimeParts) == 2) $TimeParts[2] = '0';
44 } else $TimeParts = array(0, 0, 0);
45 $Result = mktime($TimeParts[0], $TimeParts[1], $TimeParts[2], $DateParts[1], $DateParts[0], $DateParts[2]);
46 return $Result;
47}
48
49function HumanDateToTime($Date)
50{
51 if ($Date == '') return NULL;
52 return HumanDateTimeToTime($Date.' 0:0:0');
53}
54
55function DecodeHtmlEnt($str)
56{
57 $prefix = '&#';
58 $suffix = ';';
59 $hexchar = 'x';
60 $ret = html_entity_decode($str, ENT_COMPAT, 'UTF-8');
61 $p2 = 0;
62 for (;;)
63 {
64 $p = strpos($ret, $prefix, $p2);
65 if ($p === FALSE)
66 break;
67 $p2 = strpos($ret, $suffix, $p);
68 if ($p2 === FALSE)
69 {
70 $p2 = $p + strlen($prefix);
71 while (($p2 < strlen($ret)) and is_numeric($ret[$p2]))
72 $p2++;
73 if ($p2 <= ($p + strlen($prefix))) break;
74 $add = 0;
75 } else $add = 1;
76
77 if (substr($ret, $p + strlen($prefix), strlen($hexchar)) == $hexchar)
78 $char = hexdec(substr($ret, $p + strlen($prefix) + strlen($hexchar), $p2 - $p - strlen($prefix) - strlen($hexchar)));
79 else
80 $char = intval(substr($ret, $p + strlen($prefix), $p2 - $p - strlen($prefix)));
81
82 $newchar = iconv(
83 'UCS-4', 'UTF-8',
84 chr(($char >> 24) & 0xFF).chr(($char >> 16) & 0xFF).chr(($char >> 8) & 0xFF).chr($char & 0xFF)
85 );
86 $ret = substr_replace($ret, $newchar, $p, $add + $p2 - $p);
87 $p2 = $p + strlen($newchar) + $add;
88 }
89 return $ret;
90}
91
92function RemoveHtmlComments($Content)
93{
94 $Result = '';
95 while (strpos($Content, '<!--') !== false)
96 {
97 $Result .= substr($Content, 0, strpos($Content, '<!--'));
98 $Content = substr($Content, strpos($Content, '<!--') + 4);
99 $Content = substr($Content, strpos($Content, '-->') + 3);
100 }
101 return $Result;
102 //return preg_replace('/<!--(.|\s)*?-->/', '', $Content);
103}
104
105function is_alpha($Char)
106{
107 return (($Char >= 'a') and ($Char <= 'z')) or (($Char >= 'A') and ($Char <= 'Z'));
108}
109
110function is_white_space($Char)
111{
112 return ($Char == ' ') or ($Char == "\t");
113}
114
115function GetNumberBeforeText($Text, $Needle)
116{
117 $Result = '';
118 for (;;)
119 {
120 $Pos = strpos($Text, $Needle);
121 if ($Pos !== false)
122 {
123 if ((($Pos + strlen($Needle)) < strlen($Text)) and (is_alpha($Text[$Pos + strlen($Needle)])))
124 {
125 $Text = substr($Text, $Pos + 1);
126 continue;
127 }
128 $Result = substr($Text, 0, $Pos);
129 $Text = substr($Text, $Pos + 1);
130 $Start = $Pos - 1;
131 while (($Start >= 0) and (is_numeric($Result[$Start]) or is_white_space($Result[$Start])))
132 $Start--;
133 $Start++;
134 $Result = trim(substr($Result, $Start, $Pos - $Start));
135 if (is_numeric($Result)) break;
136 } else break;
137 }
138 return $Result;
139}
140
141function GetNumberAfterText($Text, $Needle)
142{
143 $Result = '';
144 for (;;)
145 {
146 $Pos = strpos($Text, $Needle);
147 if ($Pos !== false)
148 {
149 if ((($Pos - 1) >= 0) and (is_alpha($Text[$Pos - 1])))
150 {
151 $Text = substr($Text, $Pos + 1);
152 continue;
153 }
154 $Result = substr($Text, $Pos + strlen($Needle));
155 $Text = substr($Text, $Pos + 1);
156 $End = 0;
157 while (($End < strlen($Result)) and (is_numeric($Result[$End]) or is_white_space($Result[$End])))
158 $End++;
159 $End--;
160
161 $Result = trim(substr($Result, 0, $End + 1));
162 if (is_numeric($Result)) break;
163 } else break;
164 }
165 return $Result;
166}
167
168function GetAgeFromText($Text)
169{
170 $Text = strtolower($Text);
171 $Result = GetNumberAfterText($Text, 'je mi');
172 if ($Result == '') $Result = GetNumberAfterText($Text, 'je mi přes');
173 if ($Result == '') $Result = GetNumberBeforeText($Text, 'let');
174 if ($Result == '') $Result = GetNumberBeforeText($Text, 'rokov');
175 if ($Result == '') $Result = GetNumberBeforeText($Text, 'roků');
176 if ($Result == '') $Result = GetNumberBeforeText($Text, 'letou');
177 if ($Result == '') $Result = GetNumberAfterText($Text, 'čerstvých');
178 if ($Result == '') {
179 $Result = GetNumberAfterText($Text, 'jsem');
180 if ($Result > 100) $Result = ''; // Age over 100 is probably not age but height
181 }
182 if ($Result == '') $Result = GetAgeHeightWeightFromText($Text)[0];
183 if ($Result == '') {
184 $Year = GetNumberAfterText($Text, 'ročník');
185 if ($Year != '') $Result = date('Y', time()) - $Year;
186 }
187 if ($Result == '') {
188 $Year = GetNumberAfterText($Text, 'ročník:');
189 if ($Year != '') $Result = date('Y', time()) - $Year;
190 }
191 if ($Result == '') {
192 $Year = GetNumberAfterText($Text, 'narozen roku');
193 if ($Year != '') $Result = date('Y', time()) - $Year;
194 }
195 if ($Result == '') {
196 $Year = GetNumberAfterText($Text, 'narozena roku');
197 if ($Year != '') $Result = date('Y', time()) - $Year;
198 }
199 if ($Result == '') {
200 $Result = GetNumberAfterText($Text, 'věk');
201 }
202 return $Result;
203}
204
205function GetHeightFromText($Text)
206{
207 $Text = strtolower($Text);
208 $Result = GetNumberAfterText($Text, 'měřím');
209 if ($Result == '') $Result = GetNumberAfterText($Text, 'merim');
210 if ($Result == '') $Result = GetNumberAfterText($Text, 'výška');
211 if ($Result == '') $Result = GetNumberBeforeText($Text, 'cm');
212 if ($Result == '') $Result = GetNumberBeforeText($Text, 'bez podpatků');
213 if ($Result == '') {
214 $Result = GetNumberAfterText($Text, 'jsem');
215 if ($Result < 150) $Result = ''; // Height below 150 is probably not height but age
216 }
217 if ($Result == '') $Result = GetAgeHeightWeightFromText($Text)[1];
218 return $Result;
219}
220
221function GetWeightFromText($Text)
222{
223 $Text = strtolower($Text);
224 $Result = GetNumberBeforeText($Text, 'kg');
225 if ($Result == '') $Result = GetNumberAfterText($Text, 'vážím');
226 if ($Result == '') $Result = GetNumberAfterText($Text, 'váha');
227 if ($Result == '') $Result = GetAgeHeightWeightFromText($Text)[2];
228 return $Result;
229}
230
231function GetAgeHeightWeightFromText($Text)
232{
233 $Result = array('', '', '');
234 $Pattern = '/[0-9]+\/[0-9]+\/[0-9]+/i';
235 if (preg_match_all($Pattern, $Text, $Matches))
236 {
237 $Result = explode('/', $Matches[0][0]);
238 // Avoid dates in a form day/month/year
239 if ($Result[2] > 150) $Result = array('', '', '');
240 } else
241 {
242 $Pattern = '/[0-9]+\/[0-9]+/i';
243 if (preg_match_all($Pattern, $Text, $Matches))
244 {
245 $Result = explode('/', $Matches[0][0]);
246 // If first number is over 100 then its probably height/weight
247 if ($Result[0] > 100) $Result = array('', $Result[0], $Result[1]);
248 $Result[] = '';
249 }
250 }
251 return $Result;
252}
253
254function GetEmailFromText($Text)
255{
256 $Result = '';
257 if (strpos($Text, '@') !== false)
258 {
259 $Pattern = '/[a-z0-9_\-\+\.]+@[a-z0-9\-]+\.([a-z]{2,4})(?:\.[a-z]{2})?/i';
260 preg_match_all($Pattern, $Text, $Matches);
261 if (count($Matches) > 0)
262 $Result = $Matches[0][0];
263 }
264 return $Result;
265}
266
267$Locations = array(
268 'praha' => 'Praha',
269 'prahy' => 'Praha',
270 'praze' => 'Praha',
271 'brno' => 'Brno',
272 'brně' => 'Brno',
273 'ostrava' => 'Ostrava',
274 'ostravě' => 'Ostrava',
275 'ostravy' => 'Ostrava',
276 'olomouc' => 'Olomouc',
277 'liberec' => 'Liberec',
278 'opava' => 'Opava',
279 'opave' => 'Opava',
280 'plzeň' => 'Plzeň',
281 'plzni' => 'Plzeň',
282 'vyškov' => 'Vyškov',
283 'mladá boleslav' => 'Mladá Boleslav',
284 'mladé boleslavi' => 'Mladá Boleslav',
285 'litoměřice' => 'Litoměřice',
286 'sokolov' => 'Sokolov',
287 'mikulov' => 'Mikulov',
288 'havířov' => 'Havířov',
289 'kolín' => 'Kolín',
290 'kroměříž' => 'Kroměříž',
291);
292
293function GetLocationFromText($Text)
294{
295 global $Locations;
296
297 $Text = strtolower($Text);
298
299 foreach ($Locations as $Index => $Location)
300 {
301 if (strpos($Text, $Index) !== false) return $Location;
302 }
303 return '';
304}
305
306function GetGenderFromName($Text)
307{
308 $Gender = Gender::Male;
309 $Ending = substr($Text, -2);
310 if (($Ending == 'na') or ($Ending == 'ta') or ($Ending == 'va') or
311 ($Ending == 'ka') or ($Ending == 'ga') or ($Ending == 'ie') or
312 ($Ending == 'la') or ($Ending == 'za') or ($Ending == 'še') or
313 ($Ending == 'ra') or ($Ending == 'da') or ($Ending == 'sa') or
314 ($Ending == 'ce') or ($Ending == 'id') or ($Ending == 'ša') or
315 ($Ending == 'ma') or ($Ending == 'ja') or ($Ending == 'ia') or
316 ($Ending == 'ha') or ($Ending == 'is'))
317 return $Gender = Gender::Female;
318 return $Gender;
319}
320
321class MeetSources
322{
323 public $Database;
324
325 function Parse($Id = null)
326 {
327 $Output = '';
328 $Where = '(Enabled=1)';
329 if (($Id != null) and is_numeric($Id)) $Where .= ' AND (Id='.$Id.')';
330 $DbResult = $this->Database->select('MeetSource', '*', $Where);
331 while ($DbRow = $DbResult->fetch_assoc())
332 {
333 $Method = $DbRow['Method'];
334 if ($Method == 'hes') $Source = new MeetSourceTanecniSkola();
335 else if ($Method == 'vavruska') $Source = new MeetSourceVavruska();
336 else if ($Method == 'salsadance') $Source = new MeetSourceSalsaDance();
337 else if ($Method == 'astra') $Source = new MeetSourceAstraPraha();
338 else if ($Method == 'seznamka') $Source = new MeetSourceSeznamka();
339 else if ($Method == 'amblar') $Source = new MeetSourceAmblar();
340 else if ($Method == 'majkluvsvet') $Source = new MeetSourceMajkluvSvet();
341 else if ($Method == 'csts') $Source = new MeetSourceCsts();
342 else if ($Method == 'facebook') $Source = new MeetSourceFacebook();
343 else if ($Method == 'eso') $Source = new MeetSourceEso();
344 else if ($Method == 'ella') $Source = new MeetSourceElla();
345 else {
346 $Output .= 'Unsupported parse method: '.$Method.'<br/>';
347 continue;
348 }
349 $Source->Database = $this->Database;
350 $Source->Id = $DbRow['Id'];
351 $Source->URL = $DbRow['URL'];
352 $Source->Method = $Method;
353 $Source->Name = $DbRow['Name'];
354 $this->Items[] = $Source;
355 $Output .= $Source->DoImport();
356 }
357 return $Output;
358 }
359}
360
361class MeetSource
362{
363 public $Name;
364 public $URL;
365 public $Method;
366 public $Id;
367 public $Database;
368 public $MeetItems;
369 public $AddCompareTime;
370 public $AddCompareRemoteId;
371 public $AddTimeInterval;
372 public $AddedCount;
373
374 function __construct()
375 {
376 $this->MeetItems = array();
377 $this->AddedCount = 0;
378 $this->AddCompareTime = true;
379 $this->AddCompareRemoteId = false;
380 $this->AddTimeInterval = 0;
381 }
382
383 function Import()
384 {
385 return '';
386 }
387
388 function DoImport()
389 {
390 $this->MeetItems = array();
391 $this->AddedCount = 0;
392 $Output = 'Parsing '.$this->Name.' (#'.$this->Id.')...';
393 $Output .= $this->Import();
394 $Output .= ' parsed: '.count($this->MeetItems);
395 foreach ($this->MeetItems as $MeetItem)
396 {
397 $this->AddedCount += $MeetItem->AddIfNotExist($this->AddTimeInterval, $this->AddCompareTime, $this->AddCompareRemoteId);
398 }
399 $Output .= ', new added: '.$this->AddedCount;
400 $Output .= '</br>'."\n";
401 return $Output;
402 }
403}
404
405class MeetItem
406{
407 var $Database;
408 var $Name = '';
409 var $Message = '';
410 var $Time = '';
411 var $Gender = Gender::Undefined;
412 var $Phone = '';
413 var $Email = '';
414 var $Age = '';
415 var $Height = '';
416 var $Source = 0;
417 var $Weight = '';
418 var $Location = '';
419 var $Image = '';
420 var $Link = '';
421 var $Title = '';
422 var $Level = '';
423 var $RemoteId = '';
424
425 function AddIfNotExist($TimeInterval = 0, $CompareTime = true, $CompareRemoteId = false)
426 {
427 $Where = '(`Message` = "'.$this->Database->real_escape_string($this->Message).'") AND '.
428 '(`Email` = "'.$this->Database->real_escape_string($this->Email).'") AND '.
429 '(`Source` = '.$this->Source.')';
430 if ($CompareTime)
431 $Where .= ' AND (`Time` >= "'.$this->Database->real_escape_string(TimeToMysqlDateTime($this->Time - $TimeInterval)).'") AND '.
432 '(`Time` <= "'.$this->Database->real_escape_string(TimeToMysqlDateTime($this->Time + $TimeInterval)).'")';
433 if ($CompareRemoteId)
434 $Where .= ' AND (`RemoteId` = "'.$this->Database->real_escape_string($this->RemoteId).'")';
435 $DbResult = $this->Database->select('MeetItem', '*', $Where);
436 if ($DbResult->num_rows == 0)
437 {
438 if ($this->Age == '') $Age = null;
439 else $Age = $this->Age;
440 if ($this->Height == '') $Height = null;
441 else $Height = $this->Height;
442 if ($this->Weight == '') $Weight = null;
443 else $Weight = $this->Weight;
444 $this->Database->insert('MeetItem', array(
445 'Message' => $this->Message,
446 'Time' => TimeToMysqlDateTime($this->Time),
447 'Gender' => $this->Gender,
448 'Age' => $Age,
449 'Email' => $this->Email,
450 'Phone' => $this->Phone,
451 'Name' => $this->Name,
452 'Height' => $Height,
453 'Weight' => $Weight,
454 'Location' => $this->Location,
455 'Source' => $this->Source,
456 'Link' => $this->Link,
457 'RemoteId' => $this->RemoteId,
458 'TimeImport' => 'NOW()',
459 ));
460 $Result = 1;
461 } else $Result = 0;
462 return $Result;
463 }
464}
Note: See TracBrowser for help on using the repository browser.