mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-12-18 03:14:06 +00:00
Dsl: Handle unescaped '@' symbols
This commit is contained in:
parent
02941bacfb
commit
64afb464ed
24
dsl.cc
24
dsl.cc
|
@ -2256,6 +2256,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
uint32_t offset = curOffset;
|
uint32_t offset = curOffset;
|
||||||
QVector< wstring > insidedHeadwords;
|
QVector< wstring > insidedHeadwords;
|
||||||
unsigned linesInsideCard = 0;
|
unsigned linesInsideCard = 0;
|
||||||
|
int dogLine = 0;
|
||||||
|
|
||||||
// Skip the article's body
|
// Skip the article's body
|
||||||
for( ; ; )
|
for( ; ; )
|
||||||
|
@ -2265,7 +2266,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
|| ( curString.size() && !isDslWs( curString[ 0 ] ) ) )
|
|| ( curString.size() && !isDslWs( curString[ 0 ] ) ) )
|
||||||
{
|
{
|
||||||
if( insideInsided )
|
if( insideInsided )
|
||||||
|
{
|
||||||
|
gdWarning( "Unclosed tag '@' at line %i", dogLine );
|
||||||
insidedCards.append( InsidedCard( offset, curOffset - offset, insidedHeadwords ) );
|
insidedCards.append( InsidedCard( offset, curOffset - offset, insidedHeadwords ) );
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2279,6 +2283,26 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Embedded card tag must be placed at first position in line after spaces
|
||||||
|
bool isEmbeddedCard = true;
|
||||||
|
for( wstring::size_type i = 0; i < n; i++ )
|
||||||
|
{
|
||||||
|
if( !isDslWs( curString[ i ] ) )
|
||||||
|
{
|
||||||
|
isEmbeddedCard = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( !isEmbeddedCard )
|
||||||
|
{
|
||||||
|
gdWarning( "Unescaped '@' symbol at line %i", scanner.getLinesRead() - 1 );
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dogLine = scanner.getLinesRead() - 1;
|
||||||
|
|
||||||
// Handle embedded card
|
// Handle embedded card
|
||||||
|
|
||||||
|
|
144
dsl_details.cc
144
dsl_details.cc
|
@ -165,6 +165,7 @@ static inline bool checkM( wstring const & dest, wstring const & src )
|
||||||
ArticleDom::ArticleDom( wstring const & str, string const & dictName,
|
ArticleDom::ArticleDom( wstring const & str, string const & dictName,
|
||||||
wstring const & headword_):
|
wstring const & headword_):
|
||||||
root( Node::Tag(), wstring(), wstring() ), stringPos( str.c_str() ),
|
root( Node::Tag(), wstring(), wstring() ), stringPos( str.c_str() ),
|
||||||
|
lineStartPos( str.c_str() ),
|
||||||
transcriptionCount( 0 ),
|
transcriptionCount( 0 ),
|
||||||
dictionaryName( dictName ),
|
dictionaryName( dictName ),
|
||||||
headword( headword_ )
|
headword( headword_ )
|
||||||
|
@ -181,86 +182,96 @@ ArticleDom::ArticleDom( wstring const & str, string const & dictName,
|
||||||
|
|
||||||
if ( ch == L'@' && !escaped )
|
if ( ch == L'@' && !escaped )
|
||||||
{
|
{
|
||||||
// Insided card
|
if( !firstInLine() )
|
||||||
wstring linkTo;
|
|
||||||
nextChar();
|
|
||||||
for( ; ; nextChar() )
|
|
||||||
{
|
{
|
||||||
if( ch == L'\n' )
|
// Not insided card
|
||||||
break;
|
if( dictName.empty() )
|
||||||
if( ch != L'\r' )
|
gdWarning( "Unescaped '@' symbol found" );
|
||||||
linkTo.push_back( ch );
|
else
|
||||||
|
gdWarning( "Unescaped '@' symbol found in \"%s\"", dictName.c_str() );
|
||||||
}
|
}
|
||||||
linkTo = Folding::trimWhitespace( linkTo );
|
else
|
||||||
|
|
||||||
if( !linkTo.empty() )
|
|
||||||
{
|
{
|
||||||
list< wstring > allLinkEntries;
|
// Insided card
|
||||||
expandOptionalParts( linkTo, &allLinkEntries );
|
wstring linkTo;
|
||||||
|
nextChar();
|
||||||
for( list< wstring >::iterator entry = allLinkEntries.begin();
|
for( ; ; nextChar() )
|
||||||
entry != allLinkEntries.end(); )
|
|
||||||
{
|
{
|
||||||
if ( !textNode )
|
if( ch == L'\n' )
|
||||||
|
break;
|
||||||
|
if( ch != L'\r' )
|
||||||
|
linkTo.push_back( ch );
|
||||||
|
}
|
||||||
|
linkTo = Folding::trimWhitespace( linkTo );
|
||||||
|
|
||||||
|
if( !linkTo.empty() )
|
||||||
|
{
|
||||||
|
list< wstring > allLinkEntries;
|
||||||
|
expandOptionalParts( linkTo, &allLinkEntries );
|
||||||
|
|
||||||
|
for( list< wstring >::iterator entry = allLinkEntries.begin();
|
||||||
|
entry != allLinkEntries.end(); )
|
||||||
{
|
{
|
||||||
Node text = Node( Node::Text(), wstring() );
|
if ( !textNode )
|
||||||
|
{
|
||||||
|
Node text = Node( Node::Text(), wstring() );
|
||||||
|
|
||||||
|
if ( stack.empty() )
|
||||||
|
{
|
||||||
|
root.push_back( text );
|
||||||
|
stack.push_back( &root.back() );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
stack.back()->push_back( text );
|
||||||
|
stack.push_back( &stack.back()->back() );
|
||||||
|
}
|
||||||
|
|
||||||
|
textNode = stack.back();
|
||||||
|
}
|
||||||
|
textNode->text.push_back( L'-' );
|
||||||
|
textNode->text.push_back( L' ' );
|
||||||
|
|
||||||
|
// Close the currently opened text node
|
||||||
|
stack.pop_back();
|
||||||
|
textNode = 0;
|
||||||
|
|
||||||
|
wstring linkText = Folding::trimWhitespace( *entry );
|
||||||
|
processUnsortedParts( linkText, true );
|
||||||
|
ArticleDom nodeDom( linkText, dictName, headword_ );
|
||||||
|
|
||||||
|
Node link( Node::Tag(), GD_NATIVE_TO_WS( L"@" ), wstring() );
|
||||||
|
for( Node::iterator n = nodeDom.root.begin(); n != nodeDom.root.end(); ++n )
|
||||||
|
link.push_back( *n );
|
||||||
|
|
||||||
|
++entry;
|
||||||
|
|
||||||
if ( stack.empty() )
|
if ( stack.empty() )
|
||||||
{
|
{
|
||||||
root.push_back( text );
|
root.push_back( link );
|
||||||
stack.push_back( &root.back() );
|
if( entry != allLinkEntries.end() ) // Add line break before next entry
|
||||||
|
root.push_back( Node( Node::Tag(), GD_NATIVE_TO_WS( L"br" ), wstring() ) );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
stack.back()->push_back( text );
|
stack.back()->push_back( link );
|
||||||
stack.push_back( &stack.back()->back() );
|
if( entry != allLinkEntries.end() )
|
||||||
|
stack.back()->push_back( Node( Node::Tag(), GD_NATIVE_TO_WS( L"br" ), wstring() ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
textNode = stack.back();
|
|
||||||
}
|
}
|
||||||
textNode->text.push_back( L'-' );
|
|
||||||
textNode->text.push_back( L' ' );
|
|
||||||
|
|
||||||
// Close the currently opened text node
|
|
||||||
stack.pop_back();
|
|
||||||
textNode = 0;
|
|
||||||
|
|
||||||
wstring linkText = Folding::trimWhitespace( *entry );
|
|
||||||
processUnsortedParts( linkText, true );
|
|
||||||
ArticleDom nodeDom( linkText, dictName, headword_ );
|
|
||||||
|
|
||||||
Node link( Node::Tag(), GD_NATIVE_TO_WS( L"@" ), wstring() );
|
// Skip to next '@'
|
||||||
for( Node::iterator n = nodeDom.root.begin(); n != nodeDom.root.end(); ++n )
|
|
||||||
link.push_back( *n );
|
|
||||||
|
|
||||||
++entry;
|
while( !( ch == L'@' && !escaped ) )
|
||||||
|
nextChar();
|
||||||
|
|
||||||
if ( stack.empty() )
|
stringPos--;
|
||||||
{
|
ch = L'\n';
|
||||||
root.push_back( link );
|
escaped = false;
|
||||||
if( entry != allLinkEntries.end() ) // Add line break before next entry
|
|
||||||
root.push_back( Node( Node::Tag(), GD_NATIVE_TO_WS( L"br" ), wstring() ) );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
stack.back()->push_back( link );
|
|
||||||
if( entry != allLinkEntries.end() )
|
|
||||||
stack.back()->push_back( Node( Node::Tag(), GD_NATIVE_TO_WS( L"br" ), wstring() ) );
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Skip to next '@'
|
|
||||||
|
|
||||||
while( !( ch == L'@' && !escaped ) )
|
|
||||||
nextChar();
|
|
||||||
|
|
||||||
stringPos--;
|
|
||||||
ch = L'\n';
|
|
||||||
escaped = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // if ( ch == L'@' )
|
} // if ( ch == L'@' )
|
||||||
|
|
||||||
if ( ch == L'[' && !escaped )
|
if ( ch == L'[' && !escaped )
|
||||||
|
@ -763,8 +774,21 @@ void ArticleDom::nextChar() throw( eot )
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
escaped = false;
|
escaped = false;
|
||||||
|
|
||||||
|
if( ch == '\n' || ch == '\r' )
|
||||||
|
lineStartPos = stringPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ArticleDom::firstInLine()
|
||||||
|
{
|
||||||
|
// Check if current position is first after '\n' and leading spaces
|
||||||
|
if( stringPos <= lineStartPos )
|
||||||
|
return true;
|
||||||
|
for( wchar const * pch = lineStartPos; pch < stringPos - 1; pch++ )
|
||||||
|
if( *pch != ' ' && *pch != '\t' )
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/////////////// DslScanner
|
/////////////// DslScanner
|
||||||
|
|
||||||
|
|
|
@ -82,7 +82,9 @@ private:
|
||||||
void closeTag( wstring const & name, list< Node * > & stack,
|
void closeTag( wstring const & name, list< Node * > & stack,
|
||||||
bool warn = true );
|
bool warn = true );
|
||||||
|
|
||||||
wchar const * stringPos;
|
bool firstInLine();
|
||||||
|
|
||||||
|
wchar const * stringPos, * lineStartPos;
|
||||||
|
|
||||||
class eot {};
|
class eot {};
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue