libtdepim

linklocator.cpp
1
22
23#include "linklocator.h"
24#include "pimemoticons.h"
25#include <tdeglobal.h>
26#include <kstandarddirs.h>
27#include <kstaticdeleter.h>
28#include <kmdcodec.h>
29#include <kdebug.h>
30
31#include <tqstylesheet.h>
32#include <tqfile.h>
33#include <tqregexp.h>
34
35#include <limits.h>
36
37TQMap<TQString, TQString> *LinkLocator::s_smileyEmoticonNameMap = 0;
38TQMap<TQString, TQString> *LinkLocator::s_smileyEmoticonHTMLCache = 0;
39
40static KStaticDeleter< TQMap<TQString, TQString> > smileyMapDeleter;
41static KStaticDeleter< TQMap<TQString, TQString> > smileyCacheDeleter;
42
43LinkLocator::LinkLocator(const TQString& text, int pos)
44 : mText(text), mPos(pos), mMaxUrlLen(4096), mMaxAddressLen(255)
45{
46 // If you change either of the above values for maxUrlLen or
47 // maxAddressLen, then please also update the documentation for
48 // setMaxUrlLen()/setMaxAddressLen() in the header file AND the
49 // default values used for the maxUrlLen/maxAddressLen parameters
50 // of convertToHtml().
51
52 if ( !s_smileyEmoticonNameMap ) {
53 smileyMapDeleter.setObject( s_smileyEmoticonNameMap,
54 new TQMap<TQString, TQString>() );
55 for ( int i = 0; i < EmotIcons::EnumSindex::COUNT; ++i ) {
56 TQString imageName( EmotIcons::EnumSindex::enumToString[i] );
57 imageName.truncate( imageName.length() - 2 ); //remove the _0 bit
58 s_smileyEmoticonNameMap->insert( EmotIcons::smiley(i), imageName );
59 }
60 }
61
62 if ( !s_smileyEmoticonHTMLCache )
63 smileyCacheDeleter.setObject( s_smileyEmoticonHTMLCache,
64 new TQMap<TQString, TQString>() );
65}
66
68{
69 mMaxUrlLen = length;
70}
71
73{
74 return mMaxUrlLen;
75}
76
78{
79 mMaxAddressLen = length;
80}
81
83{
84 return mMaxAddressLen;
85}
86
88{
89 TQString url;
90 if(atUrl())
91 {
92 // handle cases like this: <link>http://foobar.org/</link>
93 int start = mPos;
94 while(mPos < (int)mText.length() && mText[mPos] > ' ' && mText[mPos] != '"' &&
95 TQString("<>()[]").find(mText[mPos]) == -1)
96 {
97 ++mPos;
98 }
99 /* some URLs really end with: # / & - _ */
100 const TQString allowedSpecialChars = TQString("#/&-_");
101 while(mPos > start && mText[mPos-1].isPunct() &&
102 allowedSpecialChars.find(mText[mPos-1]) == -1 )
103 {
104 --mPos;
105 }
106
107 url = mText.mid(start, mPos - start);
108 if(isEmptyUrl(url) || mPos - start > maxUrlLen())
109 {
110 mPos = start;
111 url = "";
112 }
113 else
114 {
115 --mPos;
116 }
117 }
118 return url;
119}
120
121// keep this in sync with KMMainWin::slotUrlClicked()
122bool LinkLocator::atUrl() const
123{
124 // the following characters are allowed in a dot-atom (RFC 2822):
125 // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
126 const TQString allowedSpecialChars = TQString(".!#$%&'*+-/=?^_`{|}~");
127
128 // the character directly before the URL must not be a letter, a number or
129 // any other character allowed in a dot-atom (RFC 2822).
130 if( ( mPos > 0 ) && ( mText[mPos-1].isLetterOrNumber() ||
131 ( allowedSpecialChars.find( mText[mPos-1] ) != -1 ) ) )
132 return false;
133
134 TQChar ch = mText[mPos];
135 return (ch=='h' && ( mText.mid(mPos, 7) == "http://" ||
136 mText.mid(mPos, 8) == "https://") ) ||
137 (ch=='v' && mText.mid(mPos, 6) == "vnc://") ||
138 (ch=='f' && ( mText.mid(mPos, 7) == "fish://" ||
139 mText.mid(mPos, 6) == "ftp://" ||
140 mText.mid(mPos, 7) == "ftps://") ) ||
141 (ch=='s' && ( mText.mid(mPos, 7) == "sftp://" ||
142 mText.mid(mPos, 6) == "smb://") ) ||
143 (ch=='m' && mText.mid(mPos, 7) == "mailto:") ||
144 (ch=='w' && mText.mid(mPos, 4) == "www.") ||
145 (ch=='f' && mText.mid(mPos, 4) == "ftp.") ||
146 (ch=='n' && mText.mid(mPos, 5) == "news:");
147 // note: no "file:" for security reasons
148}
149
150bool LinkLocator::isEmptyUrl(const TQString& url)
151{
152 return url.isEmpty() ||
153 url == "http://" ||
154 url == "https://" ||
155 url == "fish://" ||
156 url == "ftp://" ||
157 url == "ftps://" ||
158 url == "sftp://" ||
159 url == "smb://" ||
160 url == "vnc://" ||
161 url == "mailto" ||
162 url == "www" ||
163 url == "ftp" ||
164 url == "news" ||
165 url == "news://";
166}
167
169{
170 TQString address;
171
172 if ( mText[mPos] == '@' ) {
173 // the following characters are allowed in a dot-atom (RFC 2822):
174 // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~
175 const TQString allowedSpecialChars = TQString(".!#$%&'*+-/=?^_`{|}~");
176
177 // determine the local part of the email address
178 int start = mPos - 1;
179 while ( start >= 0 && mText[start].unicode() < 128 &&
180 ( mText[start].isLetterOrNumber() ||
181 mText[start] == '@' || // allow @ to find invalid email addresses
182 allowedSpecialChars.find( mText[start] ) != -1 ) ) {
183 if ( mText[start] == '@' )
184 return TQString(); // local part contains '@' -> no email address
185 --start;
186 }
187 ++start;
188 // we assume that an email address starts with a letter or a digit
189 while ( ( start < mPos ) && !mText[start].isLetterOrNumber() )
190 ++start;
191 if ( start == mPos )
192 return TQString(); // local part is empty -> no email address
193
194 // determine the domain part of the email address
195 int dotPos = INT_MAX;
196 int end = mPos + 1;
197 while ( end < (int)mText.length() &&
198 ( mText[end].isLetterOrNumber() ||
199 mText[end] == '@' || // allow @ to find invalid email addresses
200 mText[end] == '.' ||
201 mText[end] == '-' ) ) {
202 if ( mText[end] == '@' )
203 return TQString(); // domain part contains '@' -> no email address
204 if ( mText[end] == '.' )
205 dotPos = TQMIN( dotPos, end ); // remember index of first dot in domain
206 ++end;
207 }
208 // we assume that an email address ends with a letter or a digit
209 while ( ( end > mPos ) && !mText[end - 1].isLetterOrNumber() )
210 --end;
211 if ( end == mPos )
212 return TQString(); // domain part is empty -> no email address
213 if ( dotPos >= end )
214 return TQString(); // domain part doesn't contain a dot
215
216 if ( end - start > maxAddressLen() )
217 return TQString(); // too long -> most likely no email address
218 address = mText.mid( start, end - start );
219
220 mPos = end - 1;
221 }
222 return address;
223}
224
225TQString LinkLocator::convertToHtml(const TQString& plainText, int flags,
226 int maxUrlLen, int maxAddressLen)
227{
228 LinkLocator locator(plainText);
229 locator.setMaxUrlLen(maxUrlLen);
231
232 TQString str;
233 TQString result((TQChar*)0, (int)locator.mText.length() * 2);
234 TQChar ch;
235 int x;
236 bool startOfLine = true;
237 TQString emoticon;
238
239 for (locator.mPos = 0, x = 0; locator.mPos < (int)locator.mText.length(); locator.mPos++, x++)
240 {
241 ch = locator.mText[locator.mPos];
242 if ( flags & PreserveSpaces )
243 {
244 if (ch==' ')
245 {
246 if (startOfLine) {
247 result += "&nbsp;";
248 locator.mPos++, x++;
249 startOfLine = false;
250 }
251 while (locator.mText[locator.mPos] == ' ')
252 {
253 result += " ";
254 locator.mPos++, x++;
255 if (locator.mText[locator.mPos] == ' ') {
256 result += "&nbsp;";
257 locator.mPos++, x++;
258 }
259 }
260 locator.mPos--, x--;
261 continue;
262 }
263 else if (ch=='\t')
264 {
265 do
266 {
267 result += "&nbsp;";
268 x++;
269 }
270 while((x&7) != 0);
271 x--;
272 startOfLine = false;
273 continue;
274 }
275 }
276 if (ch=='\n')
277 {
278 result += "<br />";
279 startOfLine = true;
280 x = -1;
281 continue;
282 }
283
284 startOfLine = false;
285 if (ch=='&')
286 result += "&amp;";
287 else if (ch=='"')
288 result += "&quot;";
289 else if (ch=='<')
290 result += "&lt;";
291 else if (ch=='>')
292 result += "&gt;";
293 else
294 {
295 const int start = locator.mPos;
296 if ( !(flags & IgnoreUrls) ) {
297 str = locator.getUrl();
298 if (!str.isEmpty())
299 {
300 TQString hyperlink;
301 if(str.left(4) == "www.")
302 hyperlink = "http://" + str;
303 else if(str.left(4) == "ftp.")
304 hyperlink = "ftp://" + str;
305 else
306 hyperlink = str;
307
308 str = str.replace('&', "&amp;");
309 result += "<a href=\"" + hyperlink + "\">" + str + "</a>";
310 x += locator.mPos - start;
311 continue;
312 }
313 str = locator.getEmailAddress();
314 if(!str.isEmpty())
315 {
316 // len is the length of the local part
317 int len = str.find('@');
318 TQString localPart = str.left(len);
319
320 // remove the local part from the result (as '&'s have been expanded to
321 // &amp; we have to take care of the 4 additional characters per '&')
322 result.truncate(result.length() - len - (localPart.contains('&')*4));
323 x -= len;
324
325 result += "<a href=\"mailto:" + str + "\">" + str + "</a>";
326 x += str.length() - 1;
327 continue;
328 }
329 }
330 if ( flags & ReplaceSmileys ) {
331 str = locator.getEmoticon();
332 if ( ! str.isEmpty() ) {
333 result += str;
334 x += locator.mPos - start;
335 continue;
336 }
337 }
338 if ( flags & HighlightText ) {
339 str = locator.highlightedText();
340 if ( !str.isEmpty() ) {
341 result += str;
342 x += locator.mPos - start;
343 continue;
344 }
345 }
346 result += ch;
347 }
348 }
349
350 return result;
351}
352
353TQString LinkLocator::pngToDataUrl( const TQString & iconPath )
354{
355 if ( iconPath.isEmpty() )
356 return TQString();
357
358 TQFile pngFile( iconPath );
359 if ( !pngFile.open( IO_ReadOnly | IO_Raw ) )
360 return TQString();
361
362 TQByteArray ba = pngFile.readAll();
363 pngFile.close();
364 return TQString::fromLatin1("data:image/png;base64,%1")
365 .arg( KCodecs::base64Encode( ba ).data() );
366}
367
368
369TQString LinkLocator::getEmoticon()
370{
371 // smileys have to be prepended by whitespace
372 if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() )
373 return TQString();
374
375 // since smileys start with ':', ';', '(' or '8' short circuit method
376 const TQChar ch = mText[mPos];
377 if ( ch !=':' && ch != ';' && ch != '(' && ch != '8' )
378 return TQString();
379
380 // find the end of the smiley (a smiley is at most 4 chars long and ends at
381 // lineend or whitespace)
382 const int MinSmileyLen = 2;
383 const int MaxSmileyLen = 4;
384 int smileyLen = 1;
385 while ( ( smileyLen <= MaxSmileyLen ) &&
386 ( mPos+smileyLen < (int)mText.length() ) &&
387 !mText[mPos+smileyLen].isSpace() )
388 smileyLen++;
389 if ( smileyLen < MinSmileyLen || smileyLen > MaxSmileyLen )
390 return TQString();
391
392 const TQString smiley = mText.mid( mPos, smileyLen );
393 if ( !s_smileyEmoticonNameMap->contains( smiley ) )
394 return TQString(); // that's not a (known) smiley
395
396 TQString htmlRep;
397 if ( s_smileyEmoticonHTMLCache->contains( smiley ) ) {
398 htmlRep = (*s_smileyEmoticonHTMLCache)[smiley];
399 }
400 else {
401 const TQString imageName = (*s_smileyEmoticonNameMap)[smiley];
402 const TQString iconPath = locate( "emoticons",
403 EmotIcons::theme() +
404 TQString::fromLatin1( "/" ) +
405 imageName + TQString::fromLatin1(".png") );
406 const TQString dataUrl = pngToDataUrl( iconPath );
407 if ( dataUrl.isEmpty() ) {
408 htmlRep = TQString();
409 }
410 else {
411 // create an image tag (the text in attribute alt is used
412 // for copy & paste) representing the smiley
413 htmlRep = TQString("<img class=\"pimsmileyimg\" src=\"%1\" "
414 "alt=\"%2\" title=\"%3\" width=\"16\" height=\"16\"/>")
415 .arg( dataUrl,
416 TQStyleSheet::escape( smiley ),
417 TQStyleSheet::escape( smiley ) );
418 }
419 s_smileyEmoticonHTMLCache->insert( smiley, htmlRep );
420 }
421
422 if ( !htmlRep.isEmpty() )
423 mPos += smileyLen - 1;
424
425 return htmlRep;
426}
427
428TQString LinkLocator::highlightedText()
429{
430 // formating symbols must be prepended with a whitespace
431 if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() )
432 return TQString();
433
434 const TQChar ch = mText[mPos];
435 if ( ch != '/' && ch != '*' && ch != '_' )
436 return TQString();
437
438 TQRegExp re = TQRegExp( TQString("\\%1([0-9A-Za-z]+)\\%2").arg( ch ).arg( ch ) );
439 if ( re.search( mText, mPos ) == mPos ) {
440 uint length = re.matchedLength();
441 // there must be a whitespace after the closing formating symbol
442 if ( mPos + length < mText.length() && !mText[mPos + length].isSpace() )
443 return TQString();
444 mPos += length - 1;
445 switch ( ch.latin1() ) {
446 case '*':
447 return "<b>" + re.cap( 1 ) + "</b>";
448 case '_':
449 return "<u>" + re.cap( 1 ) + "</u>";
450 case '/':
451 return "<i>" + re.cap( 1 ) + "</i>";
452 }
453 }
454 return TQString();
455}
456
int maxAddressLen() const
TQString getEmailAddress()
Attempts to grab an email address.
void setMaxUrlLen(int length)
Sets the maximum length of URLs that will be matched by getUrl().
static TQString pngToDataUrl(const TQString &iconPath)
Embed the given PNG image into a data URL.
void setMaxAddressLen(int length)
Sets the maximum length of email addresses that will be matched by getEmailAddress().
static TQString convertToHtml(const TQString &plainText, int flags=0, int maxUrlLen=4096, int maxAddressLen=255)
Converts plaintext into html.
int mPos
The current scan position.
int maxUrlLen() const
LinkLocator(const TQString &text, int pos=0)
Constructs a LinkLocator that will search a plaintext string from a given starting point.
TQString mText
The plaintext string being scanned for URLs and email addresses.
TQString getUrl()
Attempts to grab a URL starting at the current scan position.