Changeset 47 for trunk/UAcronym.pas
- Timestamp:
- Jul 15, 2016, 4:40:41 PM (8 years ago)
- Location:
- trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk
- Property svn:ignore
-
old new 4 4 AcronymDecoder 5 5 heaptrclog.trc 6 6 AcronymDecoder.res
-
- Property svn:ignore
-
trunk/UAcronym.pas
r46 r47 138 138 end; 139 139 140 TImportFormatKind = (ifkParse , ifkMSAccess);140 TImportFormatKind = (ifkParseURL, ifkMSAccess, ifkParseFile); 141 141 142 142 { TImportFormat } … … 172 172 ResponseStream: TMemoryStream; 173 173 procedure DoPassword(Sender: TObject; var RepeatRequest : Boolean); 174 procedure TextParse(S: string); 174 175 public 175 176 Enabled: Boolean; … … 185 186 function DownloadHTTP(URL: string; Stream: TStream): Boolean; 186 187 procedure Process; 187 procedure ProcessTextParse; 188 procedure ProcessTextParseURL; 189 procedure ProcessTextParseFile; 188 190 procedure ProcessMSAccess; 189 191 procedure Assign(Source: TImportSource); … … 263 265 end; 264 266 267 function StripHTML(S: string): string; 268 var 269 TagBegin, TagEnd, TagLength: Integer; 270 begin 271 TagBegin := Pos( '<', S); // search position of first < 272 273 while (TagBegin > 0) do begin // while there is a < in S 274 TagEnd := Pos('>', S); // find the matching > 275 if TagEnd = 0 then TagLength := Length(S) - TagBegin 276 else TagLength := TagEnd - TagBegin + 1; 277 if TagLength > 0 then 278 Delete(S, TagBegin, TagLength) // delete the tag 279 else Delete(S, 1, TagEnd); // delete the tag 280 TagBegin := Pos( '<', S); // search for next < 281 end; 282 283 Result := S; // give the result 284 end; 285 265 286 { TImportSourceMSAccess } 266 287 … … 390 411 end; 391 412 413 procedure TImportSource.TextParse(S: string); 414 var 415 SS: string; 416 NewAcronym: TAcronymEntry; 417 P: Integer; 418 P1, P2: Integer; 419 Q: Integer; 420 I: Integer; 421 T: string; 422 LastLength: Integer; 423 AddedAcronym: TAcronymMeaning; 424 begin 425 ItemCount := 0; 426 NewAcronym := TAcronymEntry.Create; 427 try 428 429 // Find main block 430 if Format.Block.StartString <> '' then begin 431 P := Pos(Format.Block.StartString, S); 432 if P > 0 then 433 Delete(S, 1, P + Length(Format.Block.StartString) - 1); 434 end; 435 if Format.Block.EndString <> '' then begin 436 P := Pos(Format.Block.EndString, S); 437 if P > 0 then 438 Delete(S, P, Length(S)); 439 end; 440 441 // Remove unneeded items 442 { repeat 443 LastLength := Length(S); 444 for I := 0 to Format.ItemPatterns.Count - 1 do 445 with TImportPattern(Format.ItemPatterns[I]) do 446 if Flag = ipfSkip then begin 447 P := Pos(StartString, S); 448 if P > 0 then begin 449 SS := Copy(S, P + Length(StartString), Length(S)); 450 Q := Pos(EndString, SS); 451 if Q > 0 then begin 452 Delete(S, P, Q + Length(EndString) + Length(StartString) - 1); 453 end; 454 end; 455 end; 456 until Length(S) = LastLength; 457 } 458 // Find items 459 repeat 460 LastLength := Length(S); 461 I := 0; 462 while I < Format.ItemPatterns.Count do 463 with TImportPattern(Format.ItemPatterns[I]) do begin 464 if Flag <> ipfSkip then begin 465 if Length(StartString) > 0 then begin 466 P := Pos(StartString, S); 467 if P > 0 then Delete(S, 1, P + Length(StartString) - 1); 468 end; 469 470 if ((Length(StartString) > 0) and (P > 0)) or (Length(StartString) = 0) then begin 471 P := Pos(EndString, S); 472 T := StripHTML(Copy(S, 1, P - 1)); 473 T := StringReplace(T, '"', '"', [rfReplaceAll]); 474 T := StringReplace(T, '™', 'TM', [rfReplaceAll]); 475 T := StringReplace(T, '&', '&', [rfReplaceAll]); 476 T := StringReplace(T, ' ', ' ', [rfReplaceAll]); // No break space 477 T := Trim(T); 478 case Variable of 479 ivAcronym: NewAcronym.Name := T; 480 ivMeaning: NewAcronym.Meaning := T; 481 ivDescription: NewAcronym.Description := T; 482 end; 483 Delete(S, 1, P + Length(EndString) - 1); 484 485 if (Flag = ipfNewItem) and (Trim(NewAcronym.Name) <> '') and 486 (Trim(NewAcronym.Meaning) <> '') then begin 487 AddedAcronym := Sources.AcronymDb.AddAcronym(NewAcronym.Name, NewAcronym.Meaning); 488 AddedAcronym.Description := NewAcronym.Description; 489 AddedAcronym.Categories.Assign(Categories, laOr); 490 Inc(ItemCount); 491 end; 492 493 if Repetition then begin 494 if Length(StartString) > 0 then begin 495 P1 := Pos(StartString, S); 496 if P1 > 0 then begin 497 P2 := Pos(TImportPattern(Format.ItemPatterns[(I + 1) mod Format.ItemPatterns.Count]).StartString, S); 498 if (P2 > 0) and (P1 < P2) then Continue; 499 end; 500 end; 501 end; 502 end; 503 end; 504 Inc(I); 505 end; 506 until Length(S) = LastLength; 507 finally 508 NewAcronym.Free; 509 end; 510 end; 511 392 512 393 513 function TImportSource.DownloadHTTP(URL: string; Stream: TStream): Boolean; … … 413 533 begin 414 534 case Format.Kind of 415 ifkParse : ProcessTextParse;535 ifkParseURL: ProcessTextParseURL; 416 536 ifkMSAccess: ProcessMSAccess; 537 ifkParseFile: ProcessTextParseFile; 417 538 else raise Exception.Create(SUnsupportedImportFormat); 418 539 end; 419 540 end; 420 541 421 function StripHTML(S: string): string;422 var423 TagBegin, TagEnd, TagLength: Integer;424 begin425 TagBegin := Pos( '<', S); // search position of first <426 427 while (TagBegin > 0) do begin // while there is a < in S428 TagEnd := Pos('>', S); // find the matching >429 if TagEnd = 0 then TagLength := Length(S) - TagBegin430 else TagLength := TagEnd - TagBegin + 1;431 if TagLength > 0 then432 Delete(S, TagBegin, TagLength) // delete the tag433 else Delete(S, 1, TagEnd); // delete the tag434 TagBegin := Pos( '<', S); // search for next <435 end;436 437 Result := S; // give the result438 end;439 542 440 543 { TImportFormat } … … 614 717 { TImportSource } 615 718 616 procedure TImportSource.ProcessTextParse ;719 procedure TImportSource.ProcessTextParseURL; 617 720 var 618 721 S: string; 619 SS: string; 620 NewAcronym: TAcronymEntry; 621 P: Integer; 622 P1, P2: Integer; 623 Q: Integer; 624 I: Integer; 625 T: string; 626 LastLength: Integer; 627 AddedAcronym: TAcronymMeaning; 628 begin 629 ItemCount := 0; 630 NewAcronym := TAcronymEntry.Create; 631 try 632 if DownloadHTTP(URL, ResponseStream) then begin 633 ResponseStream.Position := 0; 634 SetLength(S, ResponseStream.Size); 635 ResponseStream.Read(S[1], Length(S)); 636 637 // Find main block 638 if Format.Block.StartString <> '' then begin 639 P := Pos(Format.Block.StartString, S); 640 if P > 0 then 641 Delete(S, 1, P + Length(Format.Block.StartString) - 1); 642 end; 643 if Format.Block.EndString <> '' then begin 644 P := Pos(Format.Block.EndString, S); 645 if P > 0 then 646 Delete(S, P, Length(S)); 647 end; 648 649 // Remove unneeded items 650 repeat 651 LastLength := Length(S); 652 for I := 0 to Format.ItemPatterns.Count - 1 do 653 with TImportPattern(Format.ItemPatterns[I]) do 654 if Flag = ipfSkip then begin 655 P := Pos(StartString, S); 656 if P > 0 then begin 657 SS := Copy(S, P + Length(StartString), Length(S)); 658 Q := Pos(EndString, SS); 659 if Q > 0 then begin 660 Delete(S, P, Q + Length(EndString) + Length(StartString) - 1); 661 end; 662 end; 663 end; 664 until Length(S) = LastLength; 665 666 // Find items 667 repeat 668 LastLength := Length(S); 669 I := 0; 670 while I < Format.ItemPatterns.Count do 671 with TImportPattern(Format.ItemPatterns[I]) do begin 672 if Flag <> ipfSkip then begin 673 if Length(StartString) > 0 then begin 674 P := Pos(StartString, S); 675 if P > 0 then Delete(S, 1, P + Length(StartString) - 1); 676 end; 677 678 if ((Length(StartString) > 0) and (P > 0)) or (Length(StartString) = 0) then begin 679 P := Pos(EndString, S); 680 T := StripHTML(Copy(S, 1, P - 1)); 681 T := StringReplace(T, '"', '"', [rfReplaceAll]); 682 T := StringReplace(T, '™', 'TM', [rfReplaceAll]); 683 T := StringReplace(T, '&', '&', [rfReplaceAll]); 684 T := StringReplace(T, ' ', ' ', [rfReplaceAll]); // No break space 685 T := Trim(T); 686 case Variable of 687 ivAcronym: NewAcronym.Name := T; 688 ivMeaning: NewAcronym.Meaning := T; 689 ivDescription: NewAcronym.Description := T; 690 end; 691 Delete(S, 1, P + Length(EndString) - 1); 692 693 if (Flag = ipfNewItem) and (Trim(NewAcronym.Name) <> '') and 694 (Trim(NewAcronym.Meaning) <> '') then begin 695 AddedAcronym := Sources.AcronymDb.AddAcronym(NewAcronym.Name, NewAcronym.Meaning); 696 AddedAcronym.Description := NewAcronym.Description; 697 AddedAcronym.Categories.Assign(Categories, laOr); 698 Inc(ItemCount); 699 end; 700 701 if Repetition then begin 702 if Length(StartString) > 0 then begin 703 P1 := Pos(StartString, S); 704 if P1 > 0 then begin 705 P2 := Pos(TImportPattern(Format.ItemPatterns[(I + 1) mod Format.ItemPatterns.Count]).StartString, S); 706 if (P2 > 0) and (P1 < P2) then Continue; 707 end; 708 end; 709 end; 710 end; 711 end; 712 Inc(I); 713 end; 714 until Length(S) = LastLength; 722 begin 723 if DownloadHTTP(URL, ResponseStream) then begin 724 ResponseStream.Position := 0; 725 SetLength(S, ResponseStream.Size); 726 ResponseStream.Read(S[1], Length(S)); 727 728 TextParse(S); 729 end; 730 end; 731 732 procedure TImportSource.ProcessTextParseFile; 733 var 734 S: TStringList; 735 begin 736 if FileExists(URL) then begin 737 S := TStringList.Create; 738 try 739 S.LoadFromFile(URL); 740 TextParse(S.Text); 741 finally 742 S.Free; 715 743 end; 716 finally 717 NewAcronym.Free; 718 end; 744 end else ShowMessage('File ' + URL + ' not found'); 719 745 end; 720 746
Note:
See TracChangeset
for help on using the changeset viewer.