goldendict-ng/htmlescape.cc

/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
 * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */

#include <QString>
#include <QTextDocumentFragment>
#include <QRegExp>
#include "htmlescape.hh"

namespace Html {

string escape( string const & str )
{
  string result( str );

  for( size_t x = result.size(); x--; )
    switch ( result[ x ] )
    {
      case '&':
        result.erase( x, 1 );
        result.insert( x, "&amp;" );
      break;

      case '<':
        result.erase( x, 1 );
        result.insert( x, "&lt;" );
      break;

      case '>':
        result.erase( x, 1 );
        result.insert( x, "&gt;" );
      break;

      case '"':
        result.erase( x, 1 );
        result.insert( x, "&quot;" );
      break;

      default:
      break;
    }

  return result;
}

static void storeLineInDiv( string & result, string const & line, bool baseRightToLeft )
{
  result += "<div";
  if( unescape( QString::fromUtf8( line.c_str(), line.size() ) ).isRightToLeft() != baseRightToLeft )
  {
    result += " dir=\"";
    result += baseRightToLeft ? "ltr\"" : "rtl\"";
  }
  result += ">";
  result += line + "</div>";
}

string preformat(string const & str , bool baseRightToLeft )
{
  string escaped = escape( str ), result, line;

  line.reserve( escaped.size() );
  result.reserve( escaped.size() );

  bool leading = true;

  for( char const * nextChar = escaped.c_str(); *nextChar; ++nextChar )
  {
    if ( leading )
    {
      if ( *nextChar == ' ' )
      {
        line += "&nbsp;";
        continue;
      }
      else
      if ( *nextChar == '\t' )
      {
        line += "&nbsp;&nbsp;&nbsp;&nbsp;";
        continue;
      }
    }

    if ( *nextChar == '\n' )
    {
      storeLineInDiv( result, line, baseRightToLeft );
      line.clear();
      leading = true;
      continue;
    }

    if ( *nextChar == '\r' )
      continue; // Just skip all \r

    line.push_back( *nextChar );

    leading = false;
  }

  if( !line.empty() )
    storeLineInDiv( result, line, baseRightToLeft );

  return result;
}

string escapeForJavaScript( string const & str )
{
  string result( str );

  for( size_t x = result.size(); x--; )
    switch ( result[ x ] )
    {
      case '\\':
      case '"':
      case '\'':
        result.insert( x, 1, '\\' );
      break;

      case '\n':
        result.erase( x, 1 );
        result.insert( x, "\\n" );
      break;

      case '\r':
        result.erase( x, 1 );
        result.insert( x, "\\r" );
      break;

      case '\t':
        result.erase( x, 1 );
        result.insert( x, "\\t" );
      break;

      default:
      break;
    }

  return result;
}

QString unescape( QString const & str )
{
  // Does it contain HTML? If it does, we need to strip it
  if ( str.contains( '<' ) || str.contains( '&' ) )
  {
    QString tmp = str;
    tmp.replace( QRegExp( "<(?:\\s*(?:div|p(?![alr])|br|li(?![ns])|td|blockquote|/ol))[^>]{0,}>",
                          Qt::CaseInsensitive, QRegExp::RegExp2 ), " " );
    tmp.remove( QRegExp( "<[^>]*>", Qt::CaseSensitive, QRegExp::RegExp2 ) );
    return QTextDocumentFragment::fromHtml( tmp.trimmed() ).toPlainText();
  }
  return str;
}

string unescapeUtf8( const string &str )
{
  return string( unescape( QString::fromUtf8( str.c_str(), str.size() ) ).toUtf8().data() );
}

}
Update year in copyright notices. 2012-02-20 21:47:14 +00:00			`/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>`
Created /trunk/src and moved everything there. 2009-01-28 20:55:45 +00:00			`* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */`

Show decription for Stardict dictionaries 2012-09-07 13:58:45 +00:00			`#include <QString>`
			`#include <QTextDocumentFragment>`
Implement full-text search 2014-04-16 16:18:28 +00:00			`#include <QRegExp>`
Created /trunk/src and moved everything there. 2009-01-28 20:55:45 +00:00			`#include "htmlescape.hh"`

			`namespace Html {`

			`string escape( string const & str )`
			`{`
			`string result( str );`

			`for( size_t x = result.size(); x--; )`
			`switch ( result[ x ] )`
			`{`
			`case '&':`
			`result.erase( x, 1 );`
			`result.insert( x, "&" );`
			`break;`

			`case '<':`
			`result.erase( x, 1 );`
			`result.insert( x, "<" );`
			`break;`

			`case '>':`
			`result.erase( x, 1 );`
			`result.insert( x, ">" );`
			`break;`

			`case '"':`
			`result.erase( x, 1 );`
			`result.insert( x, """ );`
			`break;`

			`default:`
			`break;`
			`}`

			`return result;`
			`}`

Additional detection for text direction in plain text articles 2013-07-13 11:24:58 +00:00			`static void storeLineInDiv( string & result, string const & line, bool baseRightToLeft )`
+ Support for dictd files (.index/.dict(.dz)) added, among with other small accompanying changes. 2009-04-09 18:50:49 +00:00			`{`
Additional detection for text direction in plain text articles 2013-07-13 11:24:58 +00:00			`result += "<div";`
Some more support for RTL languages in articles 2013-07-14 15:37:38 +00:00			`if( unescape( QString::fromUtf8( line.c_str(), line.size() ) ).isRightToLeft() != baseRightToLeft )`
Additional detection for text direction in plain text articles 2013-07-13 11:24:58 +00:00			`{`
			`result += " dir=\"";`
			`result += baseRightToLeft ? "ltr\"" : "rtl\"";`
			`}`
			`result += ">";`
			`result += line + "</div>";`
			`}`
+ Support for dictd files (.index/.dict(.dz)) added, among with other small accompanying changes. 2009-04-09 18:50:49 +00:00
Additional detection for text direction in plain text articles 2013-07-13 11:24:58 +00:00			`string preformat(string const & str , bool baseRightToLeft )`
			`{`
			`string escaped = escape( str ), result, line;`

			`line.reserve( escaped.size() );`
+ Support for dictd files (.index/.dict(.dz)) added, among with other small accompanying changes. 2009-04-09 18:50:49 +00:00			`result.reserve( escaped.size() );`

			`bool leading = true;`

			`for( char const * nextChar = escaped.c_str(); *nextChar; ++nextChar )`
			`{`
			`if ( leading )`
			`{`
			`if ( *nextChar == ' ' )`
			`{`
Additional detection for text direction in plain text articles 2013-07-13 11:24:58 +00:00			`line += " ";`
+ Support for dictd files (.index/.dict(.dz)) added, among with other small accompanying changes. 2009-04-09 18:50:49 +00:00			`continue;`
			`}`
			`else`
			`if ( *nextChar == '\t' )`
			`{`
Additional detection for text direction in plain text articles 2013-07-13 11:24:58 +00:00			`line += "    ";`
+ Support for dictd files (.index/.dict(.dz)) added, among with other small accompanying changes. 2009-04-09 18:50:49 +00:00			`continue;`
			`}`
			`}`

			`if ( *nextChar == '\n' )`
			`{`
Additional detection for text direction in plain text articles 2013-07-13 11:24:58 +00:00			`storeLineInDiv( result, line, baseRightToLeft );`
			`line.clear();`
+ Support for dictd files (.index/.dict(.dz)) added, among with other small accompanying changes. 2009-04-09 18:50:49 +00:00			`leading = true;`
			`continue;`
			`}`

			`if ( *nextChar == '\r' )`
			`continue; // Just skip all \r`

Additional detection for text direction in plain text articles 2013-07-13 11:24:58 +00:00			`line.push_back( *nextChar );`
+ Support for dictd files (.index/.dict(.dz)) added, among with other small accompanying changes. 2009-04-09 18:50:49 +00:00
			`leading = false;`
			`}`

Additional detection for text direction in plain text articles 2013-07-13 11:24:58 +00:00			`if( !line.empty() )`
			`storeLineInDiv( result, line, baseRightToLeft );`

Enhanced support for RTL languages in articles 2013-07-10 13:48:09 +00:00			`return result;`
+ Support for dictd files (.index/.dict(.dz)) added, among with other small accompanying changes. 2009-04-09 18:50:49 +00:00			`}`

+ Add escapeForJavaScript() function, to be used later. 2009-04-12 11:28:56 +00:00			`string escapeForJavaScript( string const & str )`
			`{`
			`string result( str );`

			`for( size_t x = result.size(); x--; )`
			`switch ( result[ x ] )`
			`{`
			`case '\\':`
			`case '"':`
			`case '\'':`
			`result.insert( x, 1, '\\' );`
			`break;`

			`case '\n':`
			`result.erase( x, 1 );`
			`result.insert( x, "\\n" );`
			`break;`

			`case '\r':`
			`result.erase( x, 1 );`
			`result.insert( x, "\\r" );`
			`break;`

			`case '\t':`
			`result.erase( x, 1 );`
			`result.insert( x, "\\t" );`
			`break;`

			`default:`
			`break;`
			`}`

			`return result;`
			`}`

Show decription for Stardict dictionaries 2012-09-07 13:58:45 +00:00			`QString unescape( QString const & str )`
			`{`
			`// Does it contain HTML? If it does, we need to strip it`
			`if ( str.contains( '<' ) \|\| str.contains( '&' ) )`
Implement full-text search 2014-04-16 16:18:28 +00:00			`{`
			`QString tmp = str;`
A little improvement of regexp for html tags stripping 2014-04-18 21:21:56 +00:00			`tmp.replace( QRegExp( "<(?:\\s*(?:div\|p(?![alr])\|br\|li(?![ns])\|td\|blockquote\|/ol))[^>]{0,}>",`
Implement full-text search 2014-04-16 16:18:28 +00:00			`Qt::CaseInsensitive, QRegExp::RegExp2 ), " " );`
			`tmp.remove( QRegExp( "<[^>]*>", Qt::CaseSensitive, QRegExp::RegExp2 ) );`
			`return QTextDocumentFragment::fromHtml( tmp.trimmed() ).toPlainText();`
			`}`
Show decription for Stardict dictionaries 2012-09-07 13:58:45 +00:00			`return str;`
			`}`

Prevent some potential crashes on broken bgl 2012-11-14 12:54:31 +00:00			`string unescapeUtf8( const string &str )`
			`{`
			`return string( unescape( QString::fromUtf8( str.c_str(), str.size() ) ).toUtf8().data() );`
			`}`

Created /trunk/src and moved everything there. 2009-01-28 20:55:45 +00:00			`}`