Changeset 27 for trunk/UAcronym.pas


Ignore:
Timestamp:
Jun 7, 2016, 3:56:46 PM (8 years ago)
Author:
chronos
Message:
  • Modified: Now import format supports multiple more generic rules for parsing source content.
  • Added: Show total and filtered number of acronyms in acronyms list.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/UAcronym.pas

    r25 r27  
    109109  end;
    110110
    111   TImportPattern = record
     111  TImportPatternFlag = (ipfNone, ipfNewItem, ipfSkip);
     112  TImportVariable = (ivNone, ivAcronym, ivMeaning, ivDescription);
     113
     114  { TImportPattern }
     115
     116  TImportPattern = class
    112117    StartString: string;
    113118    EndString: string;
    114     NewItem: Boolean;
     119    Variable: TImportVariable;
     120    Flag: TImportPatternFlag;
     121    procedure Assign(Source: TImportPattern);
     122    procedure SaveToNode(Node: TDOMNode);
     123    procedure LoadFromNode(Node: TDOMNode);
     124  end;
     125
     126  { TImportPatterns }
     127
     128  TImportPatterns = class(TObjectList)
     129    procedure SaveToNode(Node: TDOMNode);
     130    procedure LoadFromNode(Node: TDOMNode);
    115131  end;
    116132
     
    121137    Name: string;
    122138    Block: TImportPattern;
    123     Acronym: TImportPattern;
    124     Meaning: TImportPattern;
    125     Description: TImportPattern;
     139    ItemPatterns: TImportPatterns;
    126140    Formats: TImportFormats;
    127141    procedure Assign(Source: TImportFormat);
    128142    procedure SaveToNode(Node: TDOMNode);
    129143    procedure LoadFromNode(Node: TDOMNode);
     144    constructor Create;
     145    destructor Destroy; override;
    130146  end;
    131147
     
    143159
    144160  TImportSource = class
     161  private
     162    procedure DoPassword(Sender: TObject; var RepeatRequest : Boolean);
     163  public
    145164    Name: string;
    146165    URL: string;
     
    148167    LastTime: TDateTime;
    149168    Sources: TImportSources;
     169    function DownloadHTTP(URL: string; Stream: TStream): Boolean;
    150170    procedure Process;
    151171    procedure Assign(Source: TImportSource);
     
    189209function AcronymComparer(Item1, Item2: Pointer): Integer;
    190210
     211const
     212  ImportVariableString: array [TImportVariable] of string = ('None', 'Acronym',
     213    'Meaning', 'Description');
     214  ImportPatternFlagString: array [TImportPatternFlag] of string = ('None', 'New item',
     215    'Skip');
     216
    191217
    192218implementation
     
    195221  SWrongFileFormat = 'Wrong file format';
    196222
     223
    197224function AcronymComparer(Item1, Item2: Pointer): Integer;
    198225begin
     
    200227end;
    201228
    202 function DownloadHTTP(URL: string; Stream: TStream): Boolean;
     229{ TImportPatterns }
     230
     231procedure TImportPatterns.SaveToNode(Node: TDOMNode);
     232var
     233  I: Integer;
     234  NewNode2: TDOMNode;
     235begin
     236  for I := 0 to Count - 1 do
     237  with TImportPattern(Items[I]) do begin
     238    NewNode2 := Node.OwnerDocument.CreateElement('Pattern');
     239    Node.AppendChild(NewNode2);
     240    SaveToNode(NewNode2);
     241  end;
     242end;
     243
     244procedure TImportPatterns.LoadFromNode(Node: TDOMNode);
     245var
     246  Node2: TDOMNode;
     247  NewItem: TImportPattern;
     248begin
     249  Count := 0;
     250  Node2 := Node.FirstChild;
     251  while Assigned(Node2) and (Node2.NodeName = 'Pattern') do begin
     252    NewItem := TImportPattern.Create;
     253    NewItem.LoadFromNode(Node2);
     254    Add(NewItem);
     255    Node2 := Node2.NextSibling;
     256  end;
     257end;
     258
     259{ TImportPattern }
     260
     261procedure TImportPattern.Assign(Source: TImportPattern);
     262begin
     263  StartString := Source.StartString;
     264  EndString := Source.EndString;
     265  Variable := Source.Variable;
     266  Flag := Source.Flag;
     267end;
     268
     269procedure TImportPattern.SaveToNode(Node: TDOMNode);
     270begin
     271  WriteString(Node, 'StartString', StartString);
     272  WriteString(Node, 'EndString', EndString);
     273  WriteInteger(Node, 'Variable', Integer(Variable));
     274  WriteInteger(Node, 'Flag', Integer(Flag));
     275end;
     276
     277procedure TImportPattern.LoadFromNode(Node: TDOMNode);
     278begin
     279  StartString := ReadString(Node, 'StartString', '');
     280  EndString := ReadString(Node, 'EndString', '');
     281  Variable := TImportVariable(ReadInteger(Node, 'Variable', 0));
     282  Flag := TImportPatternFlag(ReadInteger(Node, 'Flag', 0));
     283end;
     284
     285procedure TImportSource.DoPassword(Sender: TObject; var RepeatRequest: Boolean);
     286begin
     287  if TFPHttpClient(Sender).Password = '' then begin
     288    TFPHttpClient(Sender).UserName := 'test';
     289    TFPHttpClient(Sender).Password := 'test';
     290    RepeatRequest := True;
     291  end else RepeatRequest := False;
     292end;
     293
     294function TImportSource.DownloadHTTP(URL: string; Stream: TStream): Boolean;
    203295var
    204296  HTTPClient: TFPHTTPClient;
    205297begin
    206298  HTTPClient := TFPHttpClient.Create(nil);
     299  HTTPClient.OnPassword := DoPassword;
    207300  HTTPClient.Get(URL, Stream);
    208301  HTTPClient.Free;
     
    232325
    233326procedure TImportFormat.Assign(Source: TImportFormat);
     327var
     328  I: Integer;
    234329begin
    235330  Name := Source.Name;
    236331  Block.StartString := Source.Block.StartString;
    237332  Block.EndString := Source.Block.EndString;
    238   Acronym.StartString := Source.Acronym.StartString;
    239   Acronym.EndString := Source.Acronym.EndString;
    240   Meaning.StartString := Source.Meaning.StartString;
    241   Meaning.EndString := Source.Meaning.EndString;
    242   Description.StartString := Source.Description.StartString;
    243   Description.EndString := Source.Description.EndString;
     333  while ItemPatterns.Count < Source.ItemPatterns.Count do
     334    ItemPatterns.Add(TImportPattern.Create);
     335  if ItemPatterns.Count > Source.ItemPatterns.Count then
     336    ItemPatterns.Count := Source.ItemPatterns.Count;
     337  for I := 0 to ItemPatterns.Count - 1 do begin
     338    TImportPattern(ItemPatterns[I]).Assign(TImportPattern(Source.ItemPatterns[I]));
     339  end;
    244340end;
    245341
    246342procedure TImportFormat.SaveToNode(Node: TDOMNode);
     343var
     344  NewNode: TDOMNode;
    247345begin
    248346  WriteInteger(Node, 'Id', Id);
     
    250348  WriteString(Node, 'BlockStartString', Block.StartString);
    251349  WriteString(Node, 'BlockEndString', Block.EndString);
    252   WriteString(Node, 'AcronymStartString', Acronym.StartString);
    253   WriteString(Node, 'AcronymEndString', Acronym.EndString);
    254   WriteString(Node, 'MeaningStartString', Meaning.StartString);
    255   WriteString(Node, 'MeaningEndString', Meaning.EndString);
    256   WriteString(Node, 'DescriptionStartString', Description.StartString);
    257   WriteString(Node, 'DescriptionEndString', Description.EndString);
     350
     351  NewNode := Node.OwnerDocument.CreateElement('Patterns');
     352  Node.AppendChild(NewNode);
     353  ItemPatterns.SaveToNode(NewNode);
    258354end;
    259355
    260356procedure TImportFormat.LoadFromNode(Node: TDOMNode);
     357var
     358  NewNode: TDOMNode;
    261359begin
    262360  Id := ReadInteger(Node, 'Id', 0);
     
    264362  Block.StartString := ReadString(Node, 'BlockStartString', '');
    265363  Block.EndString := ReadString(Node, 'BlockEndString', '');
    266   Acronym.StartString := ReadString(Node, 'AcronymStartString', '');
    267   Acronym.EndString := ReadString(Node, 'AcronymEndString', '');
    268   Meaning.StartString := ReadString(Node, 'MeaningStartString', '');
    269   Meaning.EndString := ReadString(Node, 'MeaningEndString', '');
    270   Description.StartString := ReadString(Node, 'DescriptionStartString', '');
    271   Description.EndString := ReadString(Node, 'DescriptionEndString', '');
     364
     365  NewNode := Node.FindNode('Patterns');
     366  if Assigned(NewNode) then
     367    ItemPatterns.LoadFromNode(NewNode);
     368end;
     369
     370constructor TImportFormat.Create;
     371begin
     372  Block := TImportPattern.Create;
     373  ItemPatterns := TImportPatterns.Create;
     374end;
     375
     376destructor TImportFormat.Destroy;
     377begin
     378  Block.Free;
     379  ItemPatterns.Free;
     380  inherited Destroy;
    272381end;
    273382
     
    390499  Stream: TMemoryStream;
    391500  S: string;
     501  SS: string;
    392502  NewAcronym: TAcronymEntry;
    393503  P: Integer;
    394   NewName: string;
     504  Q: Integer;
     505  I: Integer;
     506  T: string;
     507  LastLength: Integer;
    395508begin
    396509  Stream := TMemoryStream.Create;
     
    401514      SetLength(S, Stream.Size);
    402515      Stream.Read(S[1], Length(S));
     516
     517      // Find main block
    403518      if Format.Block.StartString <> '' then begin
    404519        P := Pos(Format.Block.StartString, S);
     
    412527      end;
    413528
     529      // Remove unneeded items
    414530      repeat
    415         P := Pos(Format.Acronym.StartString, S);
    416         if P > 0 then begin
    417           // Acronym
    418           Delete(S, 1, P + Length(Format.Acronym.StartString) - 1);
    419           P := Pos(Format.Acronym.EndString, S);
    420           NewName := Trim(StripHTML(Copy(S, 1, P - 1)));
    421           if NewName <> '' then NewAcronym.Name := NewName;
    422           Delete(S, 1, P + Length(Format.Acronym.EndString) - 1);
    423 
    424           // Meaning
    425           if Length(Format.Meaning.StartString) > 0 then begin
    426             P := Pos(Format.Meaning.StartString, S);
    427             Delete(S, 1, P + Length(Format.Meaning.StartString) - 1);
     531        LastLength := Length(S);
     532        for I := 0 to Format.ItemPatterns.Count - 1 do
     533        with TImportPattern(Format.ItemPatterns[I]) do
     534        if Flag = ipfSkip then begin
     535          P := Pos(StartString, S);
     536          if P > 0 then begin
     537            SS := Copy(S, P + Length(StartString), Length(S));
     538            Q := Pos(EndString, SS);
     539            if Q > 0 then begin
     540              Delete(S, P, Q + Length(EndString) + Length(StartString) - 1);
     541            end;
    428542          end;
    429           P := Pos(Format.Meaning.EndString, S);
    430           NewAcronym.Meaning := Trim(StripHTML(Copy(S, 1, P - 1)));
    431           Delete(S, 1, P + Length(Format.Meaning.EndString) - 1);
    432           Sources.AcronymDb.AddAcronym(NewAcronym.Name, NewAcronym.Meaning);
    433         end else Break;
    434       until False;
     543        end;
     544      until Length(S) = LastLength;
     545
     546      // Find items
     547      repeat
     548        LastLength := Length(S);
     549        for I := 0 to Format.ItemPatterns.Count - 1 do
     550        with TImportPattern(Format.ItemPatterns[I]) do
     551        if Flag <> ipfSkip then begin
     552          if Length(StartString) > 0 then begin
     553            P := Pos(StartString, S);
     554            if P > 0 then Delete(S, 1, P + Length(StartString) - 1);
     555          end;
     556
     557          if ((Length(StartString) > 0) and (P > 0)) or (Length(StartString) = 0) then begin
     558            P := Pos(EndString, S);
     559            T := StripHTML(Copy(S, 1, P - 1));
     560            T := StringReplace(T, '&quot;', '"', [rfReplaceAll]);
     561            T := StringReplace(T, '&trade;', 'TM', [rfReplaceAll]);
     562            T := StringReplace(T, '&amp;', '&', [rfReplaceAll]);
     563            T := Trim(T);
     564            case Variable of
     565              ivAcronym: NewAcronym.Name := T;
     566              ivMeaning: NewAcronym.Meaning := T;
     567            end;
     568            Delete(S, 1, P + Length(EndString) - 1);
     569
     570            if (Flag = ipfNewItem) and (Trim(NewAcronym.Name) <> '') and
     571              (Trim(NewAcronym.Meaning) <> '') then
     572              Sources.AcronymDb.AddAcronym(NewAcronym.Name, NewAcronym.Meaning);
     573          end;
     574        end;
     575      until Length(S) = LastLength;
    435576    end;
    436577  finally
Note: See TracChangeset for help on using the changeset viewer.