/* This file is (c) 2008-2012 Konstantin Isakov * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ #include #include #include #include "htmlescape.hh" namespace Html { string escape( string const & str ) { string result( str ); for( size_t x = result.size(); x--; ) switch ( result[ x ] ) { case '&': result.erase( x, 1 ); result.insert( x, "&" ); break; case '<': result.erase( x, 1 ); result.insert( x, "<" ); break; case '>': result.erase( x, 1 ); result.insert( x, ">" ); break; case '"': result.erase( x, 1 ); result.insert( x, """ ); break; default: break; } return result; } static void storeLineInDiv( string & result, string const & line, bool baseRightToLeft ) { result += ""; } string preformat(string const & str , bool baseRightToLeft ) { string escaped = escape( str ), result, line; line.reserve( escaped.size() ); result.reserve( escaped.size() ); bool leading = true; for( char const * nextChar = escaped.c_str(); *nextChar; ++nextChar ) { if ( leading ) { if ( *nextChar == ' ' ) { line += " "; continue; } else if ( *nextChar == '\t' ) { line += "    "; continue; } } if ( *nextChar == '\n' ) { storeLineInDiv( result, line, baseRightToLeft ); line.clear(); leading = true; continue; } if ( *nextChar == '\r' ) continue; // Just skip all \r line.push_back( *nextChar ); leading = false; } if( !line.empty() ) storeLineInDiv( result, line, baseRightToLeft ); return result; } string escapeForJavaScript( string const & str ) { string result( str ); for( size_t x = result.size(); x--; ) switch ( result[ x ] ) { case '\\': case '"': case '\'': result.insert( x, 1, '\\' ); break; case '\n': result.erase( x, 1 ); result.insert( x, "\\n" ); break; case '\r': result.erase( x, 1 ); result.insert( x, "\\r" ); break; case '\t': result.erase( x, 1 ); result.insert( x, "\\t" ); break; default: break; } return result; } QString unescape( QString const & str, bool saveFormat ) { // Does it contain HTML? If it does, we need to strip it if ( str.contains( '<' ) || str.contains( '&' ) ) { QString tmp = str; if( !saveFormat ) { tmp.replace( QRegularExpression( "<(?:\\s*/?(?:div|h[1-6r]|q|p(?![alr])|br|li(?![ns])|td|blockquote|[uo]l|pre|d[dl]|nav|address))[^>]{0,}>", QRegularExpression::CaseInsensitiveOption ), " " ); tmp.replace( QRegularExpression( "<[^>]*>"), " "); } return QTextDocumentFragment::fromHtml( tmp.trimmed() ).toPlainText(); } return str; } QString fromHtmlEscaped( QString const & str){ QString retVal = str; QRegularExpression regExp("(?\\<\\;)|(?\\>\\;)|(?\\&\\;)|(?\\"\\;)", QRegularExpression::PatternOption::CaseInsensitiveOption); auto match = regExp.match(str, 0); while (match.hasMatch()) { if (!match.captured("lt").isEmpty()) { retVal.replace(match.capturedStart("lt"), match.capturedLength("lt"), "<"); } else if (!match.captured("gt").isEmpty()) { retVal.replace(match.capturedStart("gt"), match.capturedLength("gt"), ">"); } else if (!match.captured("amp").isEmpty()) { retVal.replace(match.capturedStart("amp"), match.capturedLength("amp"), "&"); } else if (!match.captured("quot").isEmpty()) { retVal.replace(match.capturedStart("quot"), match.capturedLength("quot"), "\""); } match = regExp.match(retVal, match.capturedStart() + 1); } return retVal; } string unescapeUtf8( const string &str, bool saveFormat ) { return string( unescape( QString::fromUtf8( str.c_str(), str.size() ) ).toUtf8().data(), saveFormat ); } }