Dsl: Handle unescaped '@' symbols

This commit is contained in:
Abs62 2017-07-03 18:12:22 +03:00
parent 02941bacfb
commit 64afb464ed
3 changed files with 111 additions and 61 deletions

24
dsl.cc
View file

@ -2256,6 +2256,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
uint32_t offset = curOffset; uint32_t offset = curOffset;
QVector< wstring > insidedHeadwords; QVector< wstring > insidedHeadwords;
unsigned linesInsideCard = 0; unsigned linesInsideCard = 0;
int dogLine = 0;
// Skip the article's body // Skip the article's body
for( ; ; ) for( ; ; )
@ -2265,7 +2266,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|| ( curString.size() && !isDslWs( curString[ 0 ] ) ) ) || ( curString.size() && !isDslWs( curString[ 0 ] ) ) )
{ {
if( insideInsided ) if( insideInsided )
{
gdWarning( "Unclosed tag '@' at line %i", dogLine );
insidedCards.append( InsidedCard( offset, curOffset - offset, insidedHeadwords ) ); insidedCards.append( InsidedCard( offset, curOffset - offset, insidedHeadwords ) );
}
break; break;
} }
@ -2279,6 +2283,26 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
continue; continue;
} }
else
{
// Embedded card tag must be placed at first position in line after spaces
bool isEmbeddedCard = true;
for( wstring::size_type i = 0; i < n; i++ )
{
if( !isDslWs( curString[ i ] ) )
{
isEmbeddedCard = false;
break;
}
}
if( !isEmbeddedCard )
{
gdWarning( "Unescaped '@' symbol at line %i", scanner.getLinesRead() - 1 );
continue;
}
}
dogLine = scanner.getLinesRead() - 1;
// Handle embedded card // Handle embedded card

View file

@ -165,6 +165,7 @@ static inline bool checkM( wstring const & dest, wstring const & src )
ArticleDom::ArticleDom( wstring const & str, string const & dictName, ArticleDom::ArticleDom( wstring const & str, string const & dictName,
wstring const & headword_): wstring const & headword_):
root( Node::Tag(), wstring(), wstring() ), stringPos( str.c_str() ), root( Node::Tag(), wstring(), wstring() ), stringPos( str.c_str() ),
lineStartPos( str.c_str() ),
transcriptionCount( 0 ), transcriptionCount( 0 ),
dictionaryName( dictName ), dictionaryName( dictName ),
headword( headword_ ) headword( headword_ )
@ -181,86 +182,96 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName,
if ( ch == L'@' && !escaped ) if ( ch == L'@' && !escaped )
{ {
// Insided card if( !firstInLine() )
wstring linkTo;
nextChar();
for( ; ; nextChar() )
{ {
if( ch == L'\n' ) // Not insided card
break; if( dictName.empty() )
if( ch != L'\r' ) gdWarning( "Unescaped '@' symbol found" );
linkTo.push_back( ch ); else
gdWarning( "Unescaped '@' symbol found in \"%s\"", dictName.c_str() );
} }
linkTo = Folding::trimWhitespace( linkTo ); else
if( !linkTo.empty() )
{ {
list< wstring > allLinkEntries; // Insided card
expandOptionalParts( linkTo, &allLinkEntries ); wstring linkTo;
nextChar();
for( list< wstring >::iterator entry = allLinkEntries.begin(); for( ; ; nextChar() )
entry != allLinkEntries.end(); )
{ {
if ( !textNode ) if( ch == L'\n' )
break;
if( ch != L'\r' )
linkTo.push_back( ch );
}
linkTo = Folding::trimWhitespace( linkTo );
if( !linkTo.empty() )
{
list< wstring > allLinkEntries;
expandOptionalParts( linkTo, &allLinkEntries );
for( list< wstring >::iterator entry = allLinkEntries.begin();
entry != allLinkEntries.end(); )
{ {
Node text = Node( Node::Text(), wstring() ); if ( !textNode )
{
Node text = Node( Node::Text(), wstring() );
if ( stack.empty() )
{
root.push_back( text );
stack.push_back( &root.back() );
}
else
{
stack.back()->push_back( text );
stack.push_back( &stack.back()->back() );
}
textNode = stack.back();
}
textNode->text.push_back( L'-' );
textNode->text.push_back( L' ' );
// Close the currently opened text node
stack.pop_back();
textNode = 0;
wstring linkText = Folding::trimWhitespace( *entry );
processUnsortedParts( linkText, true );
ArticleDom nodeDom( linkText, dictName, headword_ );
Node link( Node::Tag(), GD_NATIVE_TO_WS( L"@" ), wstring() );
for( Node::iterator n = nodeDom.root.begin(); n != nodeDom.root.end(); ++n )
link.push_back( *n );
++entry;
if ( stack.empty() ) if ( stack.empty() )
{ {
root.push_back( text ); root.push_back( link );
stack.push_back( &root.back() ); if( entry != allLinkEntries.end() ) // Add line break before next entry
root.push_back( Node( Node::Tag(), GD_NATIVE_TO_WS( L"br" ), wstring() ) );
} }
else else
{ {
stack.back()->push_back( text ); stack.back()->push_back( link );
stack.push_back( &stack.back()->back() ); if( entry != allLinkEntries.end() )
stack.back()->push_back( Node( Node::Tag(), GD_NATIVE_TO_WS( L"br" ), wstring() ) );
} }
textNode = stack.back();
} }
textNode->text.push_back( L'-' );
textNode->text.push_back( L' ' );
// Close the currently opened text node
stack.pop_back();
textNode = 0;
wstring linkText = Folding::trimWhitespace( *entry );
processUnsortedParts( linkText, true );
ArticleDom nodeDom( linkText, dictName, headword_ );
Node link( Node::Tag(), GD_NATIVE_TO_WS( L"@" ), wstring() ); // Skip to next '@'
for( Node::iterator n = nodeDom.root.begin(); n != nodeDom.root.end(); ++n )
link.push_back( *n );
++entry; while( !( ch == L'@' && !escaped ) )
nextChar();
if ( stack.empty() ) stringPos--;
{ ch = L'\n';
root.push_back( link ); escaped = false;
if( entry != allLinkEntries.end() ) // Add line break before next entry
root.push_back( Node( Node::Tag(), GD_NATIVE_TO_WS( L"br" ), wstring() ) );
}
else
{
stack.back()->push_back( link );
if( entry != allLinkEntries.end() )
stack.back()->push_back( Node( Node::Tag(), GD_NATIVE_TO_WS( L"br" ), wstring() ) );
}
} }
// Skip to next '@'
while( !( ch == L'@' && !escaped ) )
nextChar();
stringPos--;
ch = L'\n';
escaped = false;
} }
} // if ( ch == L'@' ) } // if ( ch == L'@' )
if ( ch == L'[' && !escaped ) if ( ch == L'[' && !escaped )
@ -763,8 +774,21 @@ void ArticleDom::nextChar() throw( eot )
} }
else else
escaped = false; escaped = false;
if( ch == '\n' || ch == '\r' )
lineStartPos = stringPos;
} }
bool ArticleDom::firstInLine()
{
// Check if current position is first after '\n' and leading spaces
if( stringPos <= lineStartPos )
return true;
for( wchar const * pch = lineStartPos; pch < stringPos - 1; pch++ )
if( *pch != ' ' && *pch != '\t' )
return false;
return true;
}
/////////////// DslScanner /////////////// DslScanner

View file

@ -82,7 +82,9 @@ private:
void closeTag( wstring const & name, list< Node * > & stack, void closeTag( wstring const & name, list< Node * > & stack,
bool warn = true ); bool warn = true );
wchar const * stringPos; bool firstInLine();
wchar const * stringPos, * lineStartPos;
class eot {}; class eot {};