From d29a3e5469d8468084641c385ebf16948f7c2437 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 22 Jul 2008 20:00:59 -0400 Subject: sisu-0.68.0 proposed * middle layer document representation changed, (accounting for substantial patch) * texpdf multiple document sizes as specified in config * numerous small fixes [should on the whole be easier to maintain] --- lib/sisu/v0/html_tune.rb | 56 +++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 25 deletions(-) (limited to 'lib/sisu/v0/html_tune.rb') diff --git a/lib/sisu/v0/html_tune.rb b/lib/sisu/v0/html_tune.rb index 8cd8841e..11976af9 100644 --- a/lib/sisu/v0/html_tune.rb +++ b/lib/sisu/v0/html_tune.rb @@ -101,6 +101,8 @@ module SiSU_Tune @html=html end def clean + @html.gsub!(/#{Mx[:gl_o]}(#[0-9]+)#{Mx[:gl_c]}/u,'&\1;') + @html.gsub!(/#{Mx[:gl_o]}#([a-z]{2,4})#{Mx[:gl_c]}/u,'&\1;') @html.gsub!(/¢/u,'¢') # ¢ @html.gsub!(/£/u,'£') # £ @html.gsub!(/¥/u,'¥') # ¥ @@ -208,9 +210,7 @@ module SiSU_Tune tell.txt_grey unless @md.cmd =~/q/ data=Tune.new(@data,@md).endnotes_html data=Tune.new(data,@md).url_markup - if @sys.locale =~/utf-?8/i - data=Tune.new(data,@md).utf8_markup - end + data=Tune.new(data,@md).markup if @md.cmd =~/M/ #Hard Output Tune Optional on/off here data=Output.new(data,@md).hard_output Output.new(data,@md).marshal @@ -224,22 +224,30 @@ module SiSU_Tune data=@data @tuned_file=[] data.each do |para| - para.gsub!(/(\d~(\S+))/,'\1#\2. ') + para.gsub!(/#{Mx[:lv_o]}\d:(\S?)#{Mx[:lv_c]}/,'\0#\1. ') @tuned_file << para end end - def utf8_markup + def markup @tuned_file=[] @data.each do |para| #@utf8.new(para).html #@utf8.html(@para) - if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn - #¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûü - #¢£¥§©ª«®°±²³µ¶¹º»¼½¾×÷ - ##para.gsub!(//, '&#;') - ##para.gsub!(//, '&;') - para=SiSU_Tune::Clean_html.new(para).clean - end + para.gsub!(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;') + para.gsub!(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;') + para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
') + para.gsub!(/(?:\s*#{Mx[:br_page]}\s*|\s*#{Mx[:br_page_new]}\s*)+/m,'


') # else clean '' + para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') + para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') + para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') + para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1') + para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1') + para.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1') + para.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1') + para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') + para.gsub!(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'') + para.gsub!(/<(p|br)>/,'<\1 />') + para=SiSU_Tune::Clean_html.new(para).clean @tuned_file << para end end @@ -287,12 +295,10 @@ module SiSU_Tune data=@data @tuned_file=[] data.each do |para| - para.gsub!(/<:name\#(\S+?)>/,'') + #para.gsub!(/#{Mx[:mk_o]}name#(\S+?)#{Mx[:mk_c]}/,'') para.gsub!(/<-#>/,'') - para.gsub!(/<:p[bn]>/,'') - para.gsub!(/<(p|br)>/,'<\1 />') - para.gsub!(/<:br>/,'
') - unless para =~/^<:code>/ + #para.gsub!(/<(p|br)>/,'<\1 />') + unless para =~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/ if para =~/<::\s+/ #watch para.gsub!(/<::\s+(\S+?)\s+!>/, %{\\1}) @@ -308,11 +314,11 @@ module SiSU_Tune %{}) end if para =~/\{.+?\}((?:https?|file|ftp)\S+|image)/ - @word_mode=para.scan(/\{.+?\}(?:(?:https?|file|ftp)\S+|image)|\S+/) + @word_mode=para.scan(/\{.+?\}(?:(?:https?|file|ftp)\S+|image)|(?:#{Mx[:gl_o]}\S+?#{Mx[:gl_c]})+|\S+/u) words=urls(@word_mode) para.gsub!(/.+/m,words) end - if (para !~/^0~|^<:code>/) + if (para !~/^#{Mx[:meta_o]}|^#{Mx[:gr_o]}code#{Mx[:gr_c]}/) para.gsub!(/\\copyright/i,%{©}) if (para !~/\<:ad\s+\.\.\//) para.gsub!(/\<:ad\s+(\S+)?\s+(\S+\.png)\s+(.+)?\;\s+(.+)?\;\s*!\>/, @@ -327,11 +333,11 @@ module SiSU_Tune para.gsub!(/<:to(\d{1,7}?)>/,'to { \1 } ') if para =~/\b\S+\@\S+?\.\S+/ \ and para !~/(\"\S+\@\S+?\.\S+\"|>\S+\@\S+?\.\S+?<)/ - para.gsub!(/\b(\S+\@\S+?\.\S+)(\s)/,'<\1>\2') + para.gsub!(/([a-zA-Z0-9._-]+\@\S+?\.[a-zA-Z0-9._-]+)/,'<\1>') end para.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\2') #http ftp matches escaped, no decoration para.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\2\3') #special case \{ e.g. \}http://url - para.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #http ftp matches with decoration + para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #http ftp matches with decoration if para =~/..\/\S+/ \ and para !~/(\"..\/\S+?\"|>\s*..\/\S+<)/ para.gsub!(/(\.\.\/\S+)/,'\1') @@ -349,14 +355,14 @@ module SiSU_Tune data=@data @tuned_file=[] data.each do |para| - unless para =~/^<:code>/ - para.gsub!(/(~[{])(\d+) (.+?) <#@dp>([}]~)/, + unless para =~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/ + para.gsub!(/(#{Mx[:en_a_o]}|#{Mx[:en_b_o]})(\d+)\s+(.+?) #{Mx[:id_o]}#@dp#{Mx[:id_c]}(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/, '  \2  ' + #note- endnote- '\1\2  \2. \3 \4') #endnote- note- (careful may have switched) - para.gsub!(/(~\[)([*+]\d+) (.+?) <#@dp>(\]~)/, + para.gsub!(/(#{Mx[:en_b_o]})([*+]\d+)\s+(.+?) #{Mx[:id_o]}#@dp#{Mx[:id_c]}(#{Mx[:en_b_c]})/, '  \2  ' + #note- endnote- '\1\2  \2. \3 \4') #endnote- note- (careful may have switched) - para.gsub!(/(~\{)([*+]+) (.+?) <#@dp>(\}~)/, + para.gsub!(/(#{Mx[:en_a_o]})([*+]+)\s+(.+?) #{Mx[:id_o]}#@dp#{Mx[:id_c]}(#{Mx[:en_a_c]})/, '  \2  ' + #note- endnote- '\1\2  \2 \3 \4') #endnote- note- (careful may have switched) end -- cgit v1.2.3