Changeset 185


Ignore:
Timestamp:
Jul 11, 2018, 2:33:00 PM (6 years ago)
Author:
chronos
Message:
  • Modified: More robust parsing of acronym meanings.
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/Forms/UFormCheck.pas

    r184 r185  
    5353    procedure FindInSummary;
    5454    procedure FindInContent;
     55    function ParseMeaning(Acronym, Text: string; StartIndex: Integer;
     56      out Meaning: string; DashSeparator: Boolean = False): Boolean;
    5557    function IsUppercaseAlpha(Text: string): Boolean;
     58    function IsAlpha(Text: string): Boolean;
    5659    procedure ReportDifferencies;
     60    function WordContainsLetters(Text, Letters: string): Boolean;
    5761  public
    5862    procedure UpdateInterface;
     
    222226procedure TFormCheck.FindInContent;
    223227var
    224   I: Integer;
    225228  Text: string;
    226229  StartIndex: Integer;
    227230  EndIndex: Integer;
    228   StartIndex2: Integer;
    229   StartIndex3: Integer;
    230   StartIndex4: Integer;
    231231  Acronym: string;
    232232  Meaning: string;
     233  Meaning1: string;
     234  Meaning2: string;
     235  HasMeaning1: Boolean;
     236  HasMeaning2: Boolean;
    233237begin
    234238  AcronymDbContent.Acronyms.Clear;
     
    237241  Text := MemoDocument.Lines.Text;
    238242  Text := StringReplace(Text, #9, ' ', [rfReplaceAll]);
     243  Text := StringReplace(Text, LineEnding, ' ', [rfReplaceAll]);
    239244  StartIndex := 1;
    240245  repeat
     
    244249      if EndIndex <> 0 then begin
    245250        Acronym := Trim(Copy(Text, StartIndex + 1, EndIndex - StartIndex - 1));
    246         if (Acronym <> '') and IsUppercaseAlpha(Acronym) then begin
    247           StartIndex2 := StartIndex;
    248           for I := 0 to Length(Acronym) do begin
    249             StartIndex3 := PosFromIndexReverse(' ', Text, StartIndex2);
    250             StartIndex4 := PosFromIndexReverse('-', Text, StartIndex2);
    251             if StartIndex4 > StartIndex3 then StartIndex3 := StartIndex4;
    252             StartIndex4 := PosFromIndexReverse('/', Text, StartIndex2);
    253             if StartIndex4 > StartIndex3 then StartIndex3 := StartIndex4;
    254             StartIndex2 := StartIndex3;
    255             if StartIndex2 = 0 then Break
    256               else Dec(StartIndex2);
     251        if (Length(Acronym) > 1) and IsUppercaseAlpha(Acronym) then begin
     252          HasMeaning1 := ParseMeaning(Acronym, Text, StartIndex - 1, Meaning1);
     253          if HasMeaning1 then Meaning := Meaning1;
     254          HasMeaning2 := ParseMeaning(Acronym, Text, StartIndex - 1, Meaning2, True);
     255          if HasMeaning2 then Meaning := Meaning2;
     256          if HasMeaning1 and HasMeaning2 then begin
     257            if Length(Meaning1) > Length(Meaning2) then Meaning := Meaning1
     258              else Meaning := Meaning2;
    257259          end;
    258           Meaning := Trim(Copy(Text, StartIndex2 + 1, StartIndex - StartIndex2 - 1));
    259           if Assigned(AcronymDbContent.SearchAcronym(Acronym, Meaning)) then
    260             MemoReport.Lines.Add(Format(SDuplicateAcronymContent, [Acronym, Meaning]))
    261             else AcronymDbContent.AddAcronym(Acronym, Meaning);
     260          if HasMeaning1 or HasMeaning2 then begin
     261            if Assigned(AcronymDbContent.SearchAcronym(Acronym, Meaning)) then
     262              MemoReport.Lines.Add(Format(SDuplicateAcronymContent, [Acronym, Meaning]))
     263              else AcronymDbContent.AddAcronym(Acronym, Meaning);
     264          end;
    262265        end;
    263266      end;
     
    265268    end;
    266269  until StartIndex = 0;
     270end;
     271
     272function TFormCheck.ParseMeaning(Acronym, Text: string; StartIndex: Integer;
     273  out Meaning: string; DashSeparator: Boolean): Boolean;
     274var
     275  StartIndex2: Integer;
     276  StartIndex3: Integer;
     277  StartIndex4: Integer;
     278  LetterIndex: Integer;
     279  OneWord: string;
     280  WordLetterIndex: Integer;
     281  WordCount: Integer;
     282  WordCountWrong: Integer;
     283begin
     284  Result := True;
     285  Meaning := '';
     286  StartIndex2 := StartIndex;
     287  LetterIndex := Length(Acronym);
     288  WordCount := 0;
     289  WordCountWrong := 0;
     290  while Length(Acronym) > 0 do begin
     291    StartIndex3 := PosFromIndexReverse(' ', Text, StartIndex2);
     292    if DashSeparator then begin
     293      StartIndex4 := PosFromIndexReverse('-', Text, StartIndex2);
     294      if StartIndex4 > StartIndex3 then StartIndex3 := StartIndex4;
     295    end;
     296
     297    if StartIndex3 = 0 then Break;
     298    OneWord := Copy(Text, StartIndex3 + 1, StartIndex2 - StartIndex3);
     299    if OneWord = '$' then begin
     300      // Avoid parsing Bash variables
     301      Result := False;
     302      Exit;
     303    end;
     304    if Trim(OneWord) = '' then begin
     305      StartIndex2 := StartIndex3 - 1;
     306      Continue;
     307    end;
     308    // Is first letter capital?
     309    if (Length(OneWord) > 0) and IsAlpha(OneWord[1]) then begin
     310      WordLetterIndex := PosFromIndexReverse(LowerCase(OneWord[1]), LowerCase(Copy(Acronym, 1, LetterIndex)), LetterIndex);
     311      if WordLetterIndex > 0 then begin
     312        // First letter was found in acronym
     313        if WordLetterIndex <= LetterIndex then begin
     314          if not WordContainsLetters(LowerCase(OneWord), LowerCase(Copy(Acronym, WordLetterIndex, LetterIndex - WordLetterIndex + 1))) then begin
     315            Result := False;
     316            Exit;
     317          end;
     318          LetterIndex := WordLetterIndex - 1;
     319        end else begin
     320          Dec(LetterIndex);
     321        end;
     322        WordCountWrong := 0;
     323      end else begin
     324        Inc(WordCountWrong);
     325        if WordCountWrong > 1 then begin
     326          Result := False;
     327          Exit;
     328        end;
     329      end;
     330    end else begin
     331      Inc(WordCountWrong);
     332      if WordCountWrong > 1 then begin
     333        Result := False;
     334        Exit;
     335      end;
     336    end;
     337    StartIndex2 := StartIndex3 - 1;
     338    if LetterIndex < 1 then Break;
     339    Inc(WordCount);
     340    if WordCount > 2 * Length(Acronym) then begin
     341      // False acronym in braces with too much words
     342      Result := False;
     343      Exit;
     344    end;
     345  end;
     346  Meaning := Trim(Copy(Text, StartIndex2 + 1, StartIndex - StartIndex2 - 1));
    267347end;
    268348
     
    282362end;
    283363
     364function TFormCheck.IsAlpha(Text: string): Boolean;
     365var
     366  I: Integer;
     367begin
     368  I := 1;
     369  Result := True;
     370  while (I <= Length(Text)) do begin
     371    if not (Text[I] in ['A'..'Z']) and not (Text[I] in ['a'..'z']) then begin
     372      Result := False;
     373      Break;
     374    end;
     375    Inc(I);
     376  end;
     377end;
     378
    284379procedure TFormCheck.ReportDifferencies;
    285380var
     
    304399      if not Assigned(AcronymDbContent.SearchAcronym(Acronym.Name, Meaning.Name, [sfCaseInsensitive])) then
    305400        MemoReport.Lines.Add(Format(SMissingAcronymSummary, [Acronym.Name, Meaning.Name]));
     401    end;
     402  end;
     403end;
     404
     405function TFormCheck.WordContainsLetters(Text, Letters: string): Boolean;
     406var
     407  I: Integer;
     408  LetterIndex: Integer;
     409begin
     410  Result := True;
     411  for I := 1 to Length(Letters) do begin
     412    LetterIndex := Pos(Letters[I], Text);
     413    if LetterIndex > 0 then begin
     414      Text := Copy(Text, LetterIndex + 1, Length(Text));
     415    end else begin
     416      Result := False;
     417      Break;
    306418    end;
    307419  end;
Note: See TracChangeset for help on using the changeset viewer.