aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2007-07-30 09:06:51 +0100
committerRalph Amissah <ralph@amissah.com>2007-07-30 09:06:51 +0100
commita99e0de5885441989c2ae9ae6fad15fd35d0bb97 (patch)
treec78f7a8ef21b06edd5e69b4396e45edc7b59aeb2
parentusing postive lookahead for url matching, test if to change (diff)
url matching, semi-colon as possible terminator, in dal match https
-rw-r--r--CHANGELOG10
-rw-r--r--lib/sisu/v0/dal_doc_str_code.rb2
-rw-r--r--lib/sisu/v0/dal_syntax.rb6
-rw-r--r--lib/sisu/v0/html_tune.rb10
-rw-r--r--lib/sisu/v0/odf.rb20
-rw-r--r--lib/sisu/v0/shared_html_lite.rb10
-rw-r--r--lib/sisu/v0/shared_xml.rb4
-rw-r--r--lib/sisu/v0/texpdf_format.rb9
8 files changed, 40 insertions, 31 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 733ce3ba..c7f444aa 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -6,6 +6,16 @@ Reverse Chronological:
%% STABLE MANIFEST
+%% sisu_0.55.7.orig.tar.gz (2007-07-30:31/1)
+http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.7.orig.tar.gz
+ sisu_0.55.7.orig.tar.gz
+ sisu_0.55.7-1.dsc
+ sisu_0.55.7-1.diff.gz
+
+ * url matching refinement
+ * add semi-colon as possible url terminator
+ * dal match https
+
%% sisu_0.55.6.orig.tar.gz (2007-07-28:30/6)
http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.6.orig.tar.gz
69368f8eb4da28d07f3a1ee1ea5b89f3 1271022 sisu_0.55.6.orig.tar.gz
diff --git a/lib/sisu/v0/dal_doc_str_code.rb b/lib/sisu/v0/dal_doc_str_code.rb
index e6a3ae1e..18ac03d8 100644
--- a/lib/sisu/v0/dal_doc_str_code.rb
+++ b/lib/sisu/v0/dal_doc_str_code.rb
@@ -147,7 +147,7 @@ module SiSU_document_structure_code
if line =~/\S/ and line !~/^(?:alt|code|group|poem)\{|^\}(?:alt|code|group|poem)|<:(?:code|verse|alt|group).+/
line.gsub!(/\s\s/,'&nbsp;&nbsp;')
line.gsub!(/^/,'<:codeline>') if type=='code' # try sort for texpdf special case
- if line =~/http:\/\/\S+$/
+ if line =~/https?:\/\/\S+$/
line.gsub!(/$/,' <:br>')
else
line.gsub!(/$/,'<:br>') #unless type=='code'
diff --git a/lib/sisu/v0/dal_syntax.rb b/lib/sisu/v0/dal_syntax.rb
index ce5fdc72..4fb0f5d3 100644
--- a/lib/sisu/v0/dal_syntax.rb
+++ b/lib/sisu/v0/dal_syntax.rb
@@ -65,7 +65,7 @@ module Syntax
@data,@md=data,md
@vz=SiSU_Env::Get_init.instance.skin
@data_new=[]
- @http_m='\{.+?\}http://\S+|http:\S+|\.\.\/\S+|\S+?\.png\b|[*]~\S+|^0~.+|<:(?:code|group|alt|verse)(?:-end)?>|<:br>'
+ @http_m='\{.+?\}https?://\S+|https?:\S+|\.\.\/\S+|\S+?\.png\b|[*]~\S+|^0~.+|<:(?:code|group|alt|verse)(?:-end)?>|<:br>'
@manmkp_ital='[i/]\\{.+?\\}[i/]'
tail_m_ital=%q{(?:\s|[.,;:?!'")]|~\^|~\\\{\s|$)}
tail_m_bold=%q{(?:(?:<\/i>)?(?:\s|[.,;:?!'")]|~\^|~\\\{\s|$))?}
@@ -283,8 +283,8 @@ module Syntax
else line.gsub!(/(<br \/>)/i,"\\1\n")
end
else #code blocks
- line.gsub!(/(^|\s)(http:\/\/\S+)/,'\1_\2') #line.gsub!(/(^|\s)(http:\/\/\S+)/,"\\1\\\\\\2") #escape urls
- line.gsub!(/(^|\s)<(http:\/\/\S+)>([\s,.]|$)/,'\1\2\3') #clean/unescape urls with decoration, re-apply decoration later
+ line.gsub!(/(^|\s)(https?:\/\/\S+)/,'\1_\2') #line.gsub!(/(^|\s)(http:\/\/\S+)/,"\\1\\\\\\2") #escape urls
+ line.gsub!(/(^|\s)<(https?:\/\/\S+)>([\s,.]|$)/,'\1\2\3') #clean/unescape urls with decoration, re-apply decoration later
line.gsub!(/<:codeline>/,"\n")
end
line
diff --git a/lib/sisu/v0/html_tune.rb b/lib/sisu/v0/html_tune.rb
index ac8d6594..cca41056 100644
--- a/lib/sisu/v0/html_tune.rb
+++ b/lib/sisu/v0/html_tune.rb
@@ -245,8 +245,8 @@ module SiSU_Tune
@words=[]
data.each do |word|
@words << if word=~/\{(.+?)\}((?:https?|ftp)\S+|image)/
- if word =~/\{(.+?)\}((?:https?|ftp)\S+|image)([.,](?:\s|$))/
- m,u,d=/\{(.+?)\}((?:https?|ftp)\S+|image)([.,](?:\s|$))/.match(word).captures
+ if word =~/\{(.+?)\}((?:https?|ftp)\S+|image)([;.,](?:\s|$))/
+ m,u,d=/\{(.+?)\}((?:https?|ftp)\S+|image)([;.,](?:\s|$))/.match(word).captures
else m,u=/\{(.+?)\}((?:https?|ftp)\S+|image)/.match(word).captures
d=''
end
@@ -325,9 +325,9 @@ module SiSU_Tune
if (para =~/\b\S+\@\S+?\.\S+/ and para !~/(\"\S+\@\S+?\.\S+\"|>\S+\@\S+?\.\S+?<)/)
para.gsub!(/\b(\S+\@\S+?\.\S+)(\s)/,'&lt;<a href="mailto:\1">\1</a>&gt;\2')
end
- para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration
- para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url
- para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration
+ para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration
+ para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url
+ para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration
if (para =~/..\/\S+/ and para !~/(\"..\/\S+?\"|>\s*..\/\S+<)/)
para.gsub!(/(\.\.\/\S+)/,'<a href="\1">\1</a>')
end
diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb
index ff788116..6025dfb2 100644
--- a/lib/sisu/v0/odf.rb
+++ b/lib/sisu/v0/odf.rb
@@ -272,21 +272,21 @@ module SiSU_ODF
end
para
end
- def text_link_odf(txt,url)
+ def text_link_odf(txt,url,trail)
txt.gsub!(/(\\\+)/,'+') #this is convoluted, and risky :-(
url.gsub!(/(\\\+)/,'+') #this is convoluted, and risky :-(
- %{<text:a xlink:type="simple" xlink:href="#{url}">#{txt}</text:a>}
+ %{<text:a xlink:type="simple" xlink:href="#{url}">#{txt.strip}</text:a>#{trail}}
end
def text_link(para)
para.gsub!(@serial,'')
- m=para.scan(/(\{([^}]+?)\}((?:https?|ftp)\S+))/) #sort
+ m=para.scan(/(\{([^}]+?)\}((?:https?|ftp)\S+?))([;.,]?$)/) #sort
if m
m.each do |i|
- txt,url=i[1],i[2]
+ txt,url,trail=i[1],i[2]
txt.gsub!(/([)(\]\[])/,"\\\\\\1")
txt.gsub!(/([+?])/,"\\\\\\1") # problems with +
url.gsub!(/([+?])/,"\\\\\\1") # problems with +
- para.gsub!(/\{\s*#{txt}\}#{url}/m,text_link_odf(txt,url)) #make sure trailing ']' are not caught in url
+ para.gsub!(/\{\s*#{txt}\}#{url}/m,text_link_odf(txt,url,trail)) #make sure trailing ']' are not caught in url
para.gsub!(/\\([)(\]\[?])/,'\1') #clumsy fix
end
m=nil
@@ -295,13 +295,13 @@ module SiSU_ODF
end
def normal(para) #P1 - P3
para.gsub!(@serial,'')
- para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
+ para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,
%{<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>\\2}) #http ftp matches escaped, no decoration
- para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
+ para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,
%{\\1<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>\\3}) #special case \{ e.g. \}http://url
- para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/,
+ para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,
%{\\1#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration
- #para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, also works
+ #para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, also works
#%{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration
para.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/,
%{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="mailto:\\1">\\1</text:a>#{@url_brace.xml_close}})
@@ -377,7 +377,7 @@ module SiSU_ODF
parray=[]
para.split(/<:?br(?: \/)?>/).each do |parablock|
parablock=group_clean(parablock)
- parablock.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
+ parablock.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,
%{<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>\\2}) #http ftp matches escaped, no decoration
parray << %{<text:p text:style-name="P5">#{parablock}</text:p>} if parablock =~/\S+/
end
diff --git a/lib/sisu/v0/shared_html_lite.rb b/lib/sisu/v0/shared_html_lite.rb
index 43fb4446..1218aa79 100644
--- a/lib/sisu/v0/shared_html_lite.rb
+++ b/lib/sisu/v0/shared_html_lite.rb
@@ -88,8 +88,8 @@ module SiSU_Format_Shared
@words=[]
data.each do |word|
@words << if word=~/\{(.+?)\}((?:https?|ftp)\S+|image)/
- if word =~/\{(.+?)\}((?:https?|ftp)\S+|image)([.,](?:\s|$))/
- m,u,d=/\{(.+?)\}((?:https?|ftp)\S+|image)([.,](?:\s|$))/.match(word).captures
+ if word =~/\{(.+?)\}((?:https?|ftp)\S+|image)([;.,](?:\s|$))/
+ m,u,d=/\{(.+?)\}((?:https?|ftp)\S+|image)([;.,](?:\s|$))/.match(word).captures
else m,u=/\{(.+?)\}((?:https?|ftp)\S+|image)/.match(word).captures
d=''
end
@@ -131,9 +131,9 @@ module SiSU_Format_Shared
words=word_mode.join(' ')
para.gsub!(/.+/,words)
end
- para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration
- para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url
- para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration
+ para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration
+ para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url
+ para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration
para
end
def paragraph
diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb
index c30dc5db..c54ab42d 100644
--- a/lib/sisu/v0/shared_xml.rb
+++ b/lib/sisu/v0/shared_xml.rb
@@ -360,11 +360,11 @@ module SiSU_XML_munge
'\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\3">\2</link>\4') #watch, compare html_tune
#para.gsub!(/\B\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/,
# '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\1</link>\3') #watch, compare html_tune
- para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/,
+ para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,
%{\\1#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\2">\\2</link>#{@url_brace.xml_close}\\3})
#para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, #also works
#%{#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\1">\\1</link>#{@url_brace.xml_close}\\2})
- para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>\2') #escaped urls not linked, deal with later
+ para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>\2') #escaped urls not linked, deal with later
#para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\2</link>\3') #escaped urls not linked, deal with later
para.gsub!(/&nbsp;/,' ') #clean
para
diff --git a/lib/sisu/v0/texpdf_format.rb b/lib/sisu/v0/texpdf_format.rb
index 81646f23..92333d28 100644
--- a/lib/sisu/v0/texpdf_format.rb
+++ b/lib/sisu/v0/texpdf_format.rb
@@ -423,8 +423,7 @@ WOK
end
@string
end
- def special_characters_1(para)
- # ~ ^ $ & % _ { } #LaTeX special characters - KEEP list
+ def special_characters_1(para) # ~ ^ $ & % _ { } #LaTeX special characters - KEEP list
#p @@utf_8.list
#@string=Iconv.conv('ISO-8859-1', 'UTF-8', @string)
word=@string.scan(/\S+|\n/) #unless line =~/^(?:0~\S|%+\s)/
@@ -791,9 +790,9 @@ WOK
@words=[]
@string.each do |word|
@words << if word=~/\{.+?\}(?:https?|ftp):\S+/
- if word =~/\\\{(.+?)\\\}((?:https?|ftp)\S+?)([.,](?:\s|$))/
- r=%r/\\\{(.+?)\\?\}((?:https?|ftp):\S+?)(?:[.,](?:\s|$)|(?:\s|$))/
- d=/\\\{.+?\\?\}(?:https?|ftp):\S+([.,](?:\s|$))/.match(word).captures.to_s
+ if word =~/\\\{(.+?)\\\}((?:https?|ftp)\S+?)([;.,](?:\s|$))/
+ r=%r/\\\{(.+?)\\?\}((?:https?|ftp):\S+?)(?:[;.,](?:\s|$)|(?:\s|$))/
+ d=/\\\{.+?\\?\}(?:https?|ftp):\S+([;.,](?:\s|$))/.match(word).captures.to_s
else
r=%r/\\\{(.+?)\\?\}((?:https?|ftp):\S+)/
d=''