Changeset 192


Ignore:
Timestamp:
Jul 17, 2018, 2:39:01 PM (6 years ago)
Author:
chronos
Message:
  • Added: Analysis of acronyms usage (without parentheses) in document content.
  • Modified: Better reporting of acronyms with multiple meanings and with different meaning between content and summary acronyms.
Location:
trunk
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/Forms/UFormCheck.lfm

    r191 r192  
    168168          ScrollBars = ssAutoBoth
    169169          TabOrder = 0
     170          WordWrap = False
    170171        end
    171172      end
  • trunk/Forms/UFormCheck.pas

    r191 r192  
    5151    procedure FindInSummary;
    5252    procedure FindInContent;
     53    function AllowedSideChar(Before, After: Char): Boolean;
    5354    function ParseMeaning(Acronym, Text: string; StartIndex: Integer;
    5455      out Meaning: string; DashSeparator: Boolean = False): Boolean;
    5556    function IsUppercaseAlpha(Text: string): Boolean;
     57    function IsLowercaseAlpha(Text: string): Boolean;
    5658    function IsAlpha(Text: string): Boolean;
    5759    function IsAcronym(Text: string): Boolean;
     60    function IsDigit(Text: Char): Boolean;
    5861    procedure ReportDifferencies;
    5962    function WordContainsLetters(Text, Letters: string): Boolean;
     
    8285  SMissingAcronymContent = 'Warning: Document body acronym %s with meaning "%s" missing from acronym summary.';
    8386  SMissingAcronymSummary = 'Warning: Summary acronym %s with meaning "%s" missing from document body.';
     87  SAcronymContentMultipleMeanings = 'Warning: Content acronym %s has multiple meanings: %s.';
     88  SAcronymSummaryMultipleMeanings = 'Warning: Summary acronym %s has multiple meanings: %s.';
     89  SAcronymWithDifferentMeaning = 'Warning: Acronym %s has different meaning for content "%s" and for summary "%s".';
     90  SAcronymWithDifferentMeaningCount = 'Warning: Acronym %s has different meaning count for content (%d) and for summary (%d).';
    8491  SPluralAcronym = 'Note: Acronym %s is defined as plural in document body.';
     92  SPluralAcronymUsed = 'Note: Acronym %s is used as plural in document body.';
    8593  SSummaryAcronyms = 'Summary acronyms';
    8694  SContentAcronyms = 'Content acronyms';
     95  SAcronymUsedBeforeDefined = 'Acronym %s used before it was defined.';
     96
     97const
     98  MinAcronymLength = 2;
    8799
    88100{ TFormCheck }
     
    215227var
    216228  Text: string;
    217   StartIndex: Integer;
    218   EndIndex: Integer;
     229  Index: Integer;
     230  State: (stNone, stAcronymUsage, stAcronymDefinition);
    219231  Acronym: string;
     232  AcronymCharBefore: Char;
     233  AcronymCharAfter: Char;
     234  Lines: TStringList;
     235  HasUpperCase: Boolean;
     236  HasLowerCase: Boolean;
     237  I: Integer;
     238  J: Integer;
     239  Line: string;
    220240  Meaning: string;
    221241  Meaning1: string;
     
    224244  HasMeaning2: Boolean;
    225245  Plural: Boolean;
     246  StartIndex: Integer;
     247  Acro: TAcronym;
    226248begin
    227249  AcronymDbContent.Acronyms.Clear;
    228250
     251  // Make lowercase lines where all alpha characters are in uppercase
     252  // These are usually first level chapter titles or first page text
     253  Lines := TStringList.Create;
     254  Lines.Assign(MemoDocument.Lines);
     255  for I := 0 to Lines.Count - 1 do begin
     256    HasLowerCase := False;
     257    HasUpperCase := False;
     258    Line := Lines[I];
     259    for J := 1 to Length(Line) do begin
     260      if IsUppercaseAlpha(Line[J]) then HasUpperCase := True;
     261      if IsLowercaseAlpha(Line[J]) then HasLowerCase := True;
     262    end;
     263    if HasUpperCase and not HasLowerCase then
     264      Lines[I] := LowerCase(Lines[I]);
     265  end;
     266
    229267  // Find acronyms usage in text
    230   Text := MemoDocument.Lines.Text;
     268  Text := Lines.Text;
    231269  Text := StringReplace(Text, #9, ' ', [rfReplaceAll]);
    232270  Text := StringReplace(Text, LineEnding, ' ', [rfReplaceAll]);
    233   StartIndex := 1;
     271  Index := 1;
     272  AcronymCharBefore := ' ';
     273  AcronymCharAfter := ' ';
     274  State := stNone;
     275  StartIndex := 0;
    234276  repeat
    235     StartIndex := PosFromIndex('(', Text, StartIndex);
    236     if StartIndex <> 0 then begin
    237       EndIndex := PosFromIndex(')', Text, StartIndex);
    238       if EndIndex <> 0 then begin
    239         Acronym := Trim(Copy(Text, StartIndex + 1, EndIndex - StartIndex - 1));
     277    if State = stAcronymUsage then begin
     278      if not IsUppercaseAlpha(Text[Index]) then begin
     279        if Text[Index] = 's' then begin
     280          Acronym := Acronym + Text[Index];
     281          Inc(Index);
     282        end;
     283        if (Index + 1) < Length(Text) then AcronymCharAfter := Text[Index + 1]
     284          else AcronymCharAfter := ' ';
     285        State := stNone;
     286
     287        // Allow plural acronyms with ending 's' character
     288        if (Length(Acronym) >= 1) and (Acronym[Length(Acronym)] = 's') then begin
     289          Acronym := Copy(Acronym, 1, Length(Acronym) - 1);
     290          Plural := True;
     291        end else Plural := False;
     292
     293        // Acronyms should not contain numbers
     294        if (Length(Acronym) >= MinAcronymLength) and
     295          AllowedSideChar(AcronymCharBefore, AcronymCharAfter) then begin
     296            // If plural acronym then try to remove ending 's' character from the meaning
     297            if Plural then MemoReport.Lines.Add(Format(SPluralAcronymUsed, [Acronym]));
     298
     299            Acro := AcronymDbContent.Acronyms.SearchByName(Acronym);
     300            if not Assigned(Acro) then begin
     301              MemoReport.Lines.Add(Format(SAcronymUsedBeforeDefined, [Acronym]));
     302              AcronymDbContent.AddAcronym(Acronym, '');
     303            end;
     304        end;
     305      end else Acronym := Acronym + Text[Index];
     306    end else
     307    if State = stAcronymDefinition then begin
     308      if Text[Index] = ')' then begin
    240309        // Allow plural acronyms with ending 's' character
    241310        if (Length(Acronym) >= 1) and (Acronym[Length(Acronym)] = 's') then begin
     
    262331              else AcronymDbContent.AddAcronym(Acronym, Meaning);
    263332          end;
    264         end;
     333        end else
     334          // No acronym inside parenthesis, continue with parsing inside
     335          Index := StartIndex + 1;
     336        State := stNone;
     337      end else begin
     338        Acronym := Acronym + Text[Index];
    265339      end;
    266       Inc(StartIndex);
    267     end;
    268   until StartIndex = 0;
     340    end else begin
     341      if Text[Index] = '(' then begin
     342        State := stAcronymDefinition;
     343        Acronym := '';
     344        StartIndex := Index;
     345      end else
     346      if IsUppercaseAlpha(Text[Index]) then begin
     347        State := stAcronymUsage;
     348        Acronym := Text[Index];
     349        if (Index - 1) >= 0 then AcronymCharBefore := Text[Index - 1]
     350          else AcronymCharBefore := ' ';
     351      end;
     352    end;
     353    Inc(Index);
     354  until Index > Length(Text);
     355  Lines.Free;
     356end;
     357
     358function TFormCheck.AllowedSideChar(Before, After: Char): Boolean;
     359begin
     360  Result := ((Before = ' ') or (Before = #10) or (Before = #13) or (Before = ',') or
     361    (Before = ';') or (Before = '(') or (Before = ')'))
     362    and ((After = ' ') or (After = #10) or (After = #13) or (After = ',') or
     363    (After = '.') or (After = ';') or (After = '(') or (After = ')'));
     364
    269365end;
    270366
     
    361457end;
    362458
     459function TFormCheck.IsLowercaseAlpha(Text: string): Boolean;
     460var
     461  I: Integer;
     462begin
     463  I := 1;
     464  Result := True;
     465  while (I <= Length(Text)) do begin
     466    if not (Text[I] in ['a'..'z']) then begin
     467      Result := False;
     468      Break;
     469    end;
     470    Inc(I);
     471  end;
     472end;
     473
    363474function TFormCheck.IsAlpha(Text: string): Boolean;
    364475var
     
    383494end;
    384495
     496function TFormCheck.IsDigit(Text: Char): Boolean;
     497begin
     498  Result := Text in ['0'..'9'];
     499end;
     500
    385501procedure TFormCheck.ReportDifferencies;
    386502var
     
    388504  J: Integer;
    389505  Acronym: TAcronym;
     506  Acronym2: TAcronym;
    390507  Meaning: TAcronymMeaning;
    391 begin
     508  Meaning2: TAcronymMeaning;
     509begin
     510  // In content but not in summary
    392511  for I := 0 to AcronymDbContent.Acronyms.Count - 1 do begin
    393512    Acronym := TAcronym(AcronymDbContent.Acronyms[I]);
     513    if Acronym.Meanings.Count > 1 then
     514      MemoReport.Lines.Add(Format(SAcronymContentMultipleMeanings, [Acronym.Name, Acronym.Meanings.GetNames]));
     515    Acronym2 := AcronymDbSummary.Acronyms.SearchByName(Acronym.Name);
     516    if not Assigned(Acronym2) then
    394517    for J := 0 to Acronym.Meanings.Count - 1 do begin
    395518      Meaning := TAcronymMeaning(Acronym.Meanings[J]);
     
    399522  end;
    400523
     524  // In summary but not in content
    401525  for I := 0 to AcronymDbSummary.Acronyms.Count - 1 do begin
    402526    Acronym := TAcronym(AcronymDbSummary.Acronyms[I]);
     527    if Acronym.Meanings.Count > 1 then
     528      MemoReport.Lines.Add(Format(SAcronymSummaryMultipleMeanings, [Acronym.Name, Acronym.Meanings.GetNames]));
     529    Acronym2 := AcronymDbContent.Acronyms.SearchByName(Acronym.Name);
     530    if not Assigned(Acronym2) then
    403531    for J := 0 to Acronym.Meanings.Count - 1 do begin
    404532      Meaning := TAcronymMeaning(Acronym.Meanings[J]);
    405533      if not Assigned(AcronymDbContent.SearchAcronym(Acronym.Name, Meaning.Name, [sfCaseInsensitive])) then
    406534        MemoReport.Lines.Add(Format(SMissingAcronymSummary, [Acronym.Name, Meaning.Name]));
     535    end;
     536  end;
     537
     538  // With different meaning
     539  for I := 0 to AcronymDbSummary.Acronyms.Count - 1 do begin
     540    Acronym := TAcronym(AcronymDbSummary.Acronyms[I]);
     541    Acronym2 := AcronymDbContent.Acronyms.SearchByName(Acronym.Name);
     542    if Assigned(Acronym2) then begin
     543      if (Acronym.Meanings.Count = 1) and (Acronym2.Meanings.Count = 1) then begin
     544        Meaning := TAcronymMeaning(Acronym.Meanings[0]);
     545        Meaning2 := TAcronymMeaning(Acronym2.Meanings[0]);
     546        if Meaning.Name <> Meaning2.Name then
     547          MemoReport.Lines.Add(Format(SAcronymWithDifferentMeaning, [Acronym.Name, Meaning.Name, Meaning2.Name]));
     548      end else
     549        MemoReport.Lines.Add(Format(SAcronymWithDifferentMeaningCount, [Acronym.Name, Acronym.Meanings.Count, Acronym2.Meanings.Count]));
    407550    end;
    408551  end;
  • trunk/Languages/AcronymDecoder.cs.po

    r191 r192  
    917917msgstr "Opravdu chcete odebrat vybrané kategorie?"
    918918
     919#: uformcheck.sacronymcontentmultiplemeanings
     920msgid "Warning: Content acronym %s has multiple meanings: %s."
     921msgstr "Varování: Zkratka těla dokumentu %s má více významů: %s."
     922
    919923#: uformcheck.sacronymcountcontent
    920924msgctxt "uformcheck.sacronymcountcontent"
     
    927931msgstr "Počet zkratek přehledu:"
    928932
     933#: uformcheck.sacronymsummarymultiplemeanings
     934msgid "Warning: Summary acronym %s has multiple meanings: %s."
     935msgstr "Varování: Zkratka přehledu dokumentu %s má více významů: %s."
     936
     937#: uformcheck.sacronymusedbeforedefined
     938msgid "Acronym %s used before it was defined."
     939msgstr "Zkratka %s použita před určením jejího významu."
     940
     941#: uformcheck.sacronymwithdifferentmeaning
     942msgid "Warning: Acronym %s has different meaning for content \"%s\" and for summary \"%s\"."
     943msgstr "Varování: Zkratka %s má odlišný význam pro tělo dokumentu \"%s\" a pro přehled \"%s\"."
     944
     945#: uformcheck.sacronymwithdifferentmeaningcount
     946msgid "Warning: Acronym %s has different meaning count for content (%d) and for summary (%d)."
     947msgstr "Varování: Zkratka %s má odlišný počet významů pro tělo dokumentu (%d) a pro přehled (%d)."
     948
    929949#: uformcheck.scontentacronyms
    930950msgid "Content acronyms"
     
    950970msgid "Note: Acronym %s is defined as plural in document body."
    951971msgstr "Poznámka: Zkratka %s je definována v těle dokumentu v množném čísle."
     972
     973#: uformcheck.spluralacronymused
     974msgid "Note: Acronym %s is used as plural in document body."
     975msgstr "Poznámka: Zkratka %s je použita v množném čísle v těle dokumentu."
    952976
    953977#: uformcheck.ssummaryacronyms
  • trunk/Languages/AcronymDecoder.po

    r191 r192  
    907907msgstr ""
    908908
     909#: uformcheck.sacronymcontentmultiplemeanings
     910msgid "Warning: Content acronym %s has multiple meanings: %s."
     911msgstr ""
     912
    909913#: uformcheck.sacronymcountcontent
    910914msgctxt "uformcheck.sacronymcountcontent"
     
    917921msgstr ""
    918922
     923#: uformcheck.sacronymsummarymultiplemeanings
     924msgid "Warning: Summary acronym %s has multiple meanings: %s."
     925msgstr ""
     926
     927#: uformcheck.sacronymusedbeforedefined
     928msgid "Acronym %s used before it was defined."
     929msgstr ""
     930
     931#: uformcheck.sacronymwithdifferentmeaning
     932msgid "Warning: Acronym %s has different meaning for content \"%s\" and for summary \"%s\"."
     933msgstr ""
     934
     935#: uformcheck.sacronymwithdifferentmeaningcount
     936msgid "Warning: Acronym %s has different meaning count for content (%d) and for summary (%d)."
     937msgstr ""
     938
    919939#: uformcheck.scontentacronyms
    920940msgid "Content acronyms"
     
    939959#: uformcheck.spluralacronym
    940960msgid "Note: Acronym %s is defined as plural in document body."
     961msgstr ""
     962
     963#: uformcheck.spluralacronymused
     964msgid "Note: Acronym %s is used as plural in document body."
    941965msgstr ""
    942966
  • trunk/UAcronym.pas

    r184 r192  
    7070    function SearchByName(Name: string; Flags: TSearchFlags = []): TAcronymMeaning;
    7171    function AddMeaning(Name: string): TAcronymMeaning;
     72    function GetNames: string;
    7273  end;
    7374
     
    10571058end;
    10581059
     1060function TAcronymMeanings.GetNames: string;
     1061var
     1062  I: Integer;
     1063begin
     1064  Result := '';
     1065  for I := 0 to Count - 1 do
     1066    Result := Result + ', "' + TAcronymMeaning(Items[I]).Name + '"';
     1067  System.Delete(Result, 1, 2);
     1068end;
     1069
    10591070{ TAcronymMeaning }
    10601071
Note: See TracChangeset for help on using the changeset viewer.