diff options
author | Ralph Amissah <ralph@amissah.com> | 2007-07-09 11:19:48 +0100 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2007-07-09 11:19:48 +0100 |
commit | fdd1489c82a274615e46e3c67fc5707e3fb0465f (patch) | |
tree | 1241b19a94118352dbb16cbc3f02483a51919ce8 /lib/sisu/v0/plaintext.rb | |
parent | sisu-0.55.2 + md5s (diff) |
improved url matching, and texpdf tolerance and indentation levels set
Diffstat (limited to 'lib/sisu/v0/plaintext.rb')
-rw-r--r-- | lib/sisu/v0/plaintext.rb | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/lib/sisu/v0/plaintext.rb b/lib/sisu/v0/plaintext.rb index 4bff976a..f08a0871 100644 --- a/lib/sisu/v0/plaintext.rb +++ b/lib/sisu/v0/plaintext.rb @@ -327,7 +327,8 @@ WOK end para.gsub!(/<:p[bn]>/,'') # remove page breaks para.gsub!(/^\s*<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/,'') # remove empty lines - check - para.gsub!(/(^|\s)(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/,"\\1#{@url_brace.txt_open}\\2#{@url_brace.txt_close}\\3") + para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.txt_open}\\2#{@url_brace.txt_close}\\3") + para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') para.gsub!(/<a href=".+?">(.+?)<\/a>/m,'\1') para.gsub!(/<:name#\S+?>/,'') # remove name links para.gsub!(/ /,' ') # decide on |