From fdd1489c82a274615e46e3c67fc5707e3fb0465f Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Mon, 9 Jul 2007 11:19:48 +0100 Subject: improved url matching, and texpdf tolerance and indentation levels set --- lib/sisu/v0/plaintext.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/sisu/v0/plaintext.rb') diff --git a/lib/sisu/v0/plaintext.rb b/lib/sisu/v0/plaintext.rb index 4bff976a..f08a0871 100644 --- a/lib/sisu/v0/plaintext.rb +++ b/lib/sisu/v0/plaintext.rb @@ -327,7 +327,8 @@ WOK end para.gsub!(/<:p[bn]>/,'') # remove page breaks para.gsub!(/^\s*<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/,'') # remove empty lines - check - para.gsub!(/(^|\s)(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/,"\\1#{@url_brace.txt_open}\\2#{@url_brace.txt_close}\\3") + para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.txt_open}\\2#{@url_brace.txt_close}\\3") + para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') para.gsub!(/(.+?)<\/a>/m,'\1') para.gsub!(/<:name#\S+?>/,'') # remove name links para.gsub!(/ /,' ') # decide on -- cgit v1.2.3