From d29a3e5469d8468084641c385ebf16948f7c2437 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 22 Jul 2008 20:00:59 -0400 Subject: sisu-0.68.0 proposed * middle layer document representation changed, (accounting for substantial patch) * texpdf multiple document sizes as specified in config * numerous small fixes [should on the whole be easier to maintain] --- lib/sisu/v0/html_segments.rb | 152 ++++++++++++++++++++++--------------------- 1 file changed, 77 insertions(+), 75 deletions(-) (limited to 'lib/sisu/v0/html_segments.rb') diff --git a/lib/sisu/v0/html_segments.rb b/lib/sisu/v0/html_segments.rb index a2d06ed9..a15c302e 100644 --- a/lib/sisu/v0/html_segments.rb +++ b/lib/sisu/v0/html_segments.rb @@ -103,9 +103,9 @@ module SiSU_HTML_seg @h_sfx=@md.sfx if @md.file_type =~/html/ @h_sfx='.html' if @md.file_type =~/html/ #used in creating file, not to be omitted. data.each do |para| - if para =~/^4~/ - @@seg_name << para[/^4~(\S+)/,1] - seg_name=para[/^4~(\S+)/,1] + if para =~/^#{Mx[:lv_o]}4:/ + @@seg_name << para[/^#{Mx[:lv_o]}4:(\S+?)#{Mx[:lv_c]}/,1] + seg_name=para[/^#{Mx[:lv_o]}4:(\S+?)#{Mx[:lv_c]}/,1] @@seg_ad[seg_name]=para[/.+?<:\d\s+(.+)\s*?>/,1] #watch end end @@ -116,36 +116,36 @@ module SiSU_HTML_seg tell.segmented unless @md.cmd =~/q/ flagend='y' data.each do |para| - if para =~/^4~.+/ #watch - if para =~/<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - @@header4=para.to_s[/^4~(?:\S+\s+)?(.+?)<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/,1] - else @@header4=para.to_s[/^4~(?:\S+\s+)?(.+)/,1] + if para =~/^#{Mx[:lv_o]}4:/ #watch + if para =~/#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ + @@header4=para.to_s[/^#{Mx[:lv_o]}4:\S+?#{Mx[:lv_c]}(.+?)#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/,1] + else @@header4=para.to_s[/^#{Mx[:lv_o]}4:\S+?#{Mx[:lv_c]}(.+)/,1] end @@is4=newfile=1 end - if para =~/^3~.+/ - @@header3=para.to_s[/^3~(?:~\S+\s+)?(.+)/,1] + if para =~/^#{Mx[:lv_o]}3:\S*?#{Mx[:lv_c]}/ + @@header3=para.to_s[/^#{Mx[:lv_o]}3:\S*?#{Mx[:lv_c]}\s*?(.+)/,1] @@is4,@@is3=0,1 end - if para =~/^2~.+/ - @@header2=para.to_s[/^2~(?:~\S+\s+)?(.+)/,1] + if para =~/^#{Mx[:lv_o]}2:\S*?#{Mx[:lv_c]}/ + @@header2=para.to_s[/^#{Mx[:lv_o]}2:\S*?#{Mx[:lv_c]}\s*?(.+)/,1] @@is4,@@is3,@@is2=0,0,1 end - if para =~/^1~.+/ - @@header1=para.to_s[/^1~(?:~\S+\s+)?(.+)/,1] + if para =~/^#{Mx[:lv_o]}1:\S*?#{Mx[:lv_c]}/ + @@header1=para.to_s[/^#{Mx[:lv_o]}1:\S*?#{Mx[:lv_c]}\s*?(.+)/,1] @@is4,@@is3,@@is2,@@is1=0,0,0,1 end if (@@is1 && !@@is2 && !@@is3 && !@@is4) - unless para =~/^1~/; head1=$_ #; + unless para =~/^#{Mx[:lv_o]}1:/; head1=$_ #; end end if @@is4 == 1 \ - or para =~/^|^/ + or para =~/^#{Mx[:br_endnotes]}|^#{Mx[:br_eof]}/ if newfile == 1 \ - or para =~/^|^/ + or para =~/^#{Mx[:br_endnotes]}|^#{Mx[:br_eof]}/ newfile=0 - if para =~/^4~\S+/ \ - or para =~/^|^/ # @@level4 + if para =~/^#{Mx[:lv_o]}4:\S+?#{Mx[:lv_c]}/ \ + or para =~/^#{Mx[:br_endnotes]}|^#{Mx[:br_eof]}/ # @@level4 if tracking != 0 mkdir_p(@md.dir_out) unless FileTest.directory?(@md.dir_out) #bug - added specifically for nav! not needed by regular seg, check !!! Seg.new('',@md).tail @@ -174,8 +174,8 @@ module SiSU_HTML_seg tracking=tracking + 1 end m=para[/.+?.*/]; @@get_hash_to=$1 if m # changed 2002w42, again w44 ! & again 2003w16 - m=para[/^4~(\S+)/]; @@get_hash_fn=$1 if m - para=if para =~//um; para.split(/\n/) + m=para[/^#{Mx[:lv_o]}4:(\S+?)#{Mx[:lv_c]}/]; @@get_hash_fn=$1 if m + para=if para =~/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}\s*c|#{Mx[:gr_o]}(?:code|alt|verse|group)#{Mx[:gr_c]}/um; para.split(/\n/) else para end if para.class == String @@ -195,7 +195,7 @@ module SiSU_HTML_seg end def header_art(para) format_head_seg=SiSU_HTML_Format_type::Head_seg.new(@md) - if para =~/^[0-6]~/ #2004w27/5 + if para =~/^#{Mx[:lv_o]}[1-6]:/ #2004w27/5 if @@tracker < @@seg_total-1; @@seg[:dot_nav]=format_head_seg.dot_control_pre_next else @@seg[:dot_nav]=format_head_seg.dot_control_pre end @@ -204,7 +204,7 @@ module SiSU_HTML_seg @@seg[:title]=format_head_seg.head << ads.div.major end def head(para) - clean=/|<:.*?>|<~\d+;(?:[ohum]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + clean=/|#{Mx[:gr_o]}:.*?#{Mx[:gr_c]}|<:.*?>|#{Mx[:id_o]}~\d+;(?:[ohum]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ format_head_seg=SiSU_HTML_Format_type::Head_seg.new(@md) if @@tracker < @@seg_total-1 if @@tracker == 0; @@segtocband=format_head_seg.toc_next2 #if format_head_seg.toc_next2 @@ -218,7 +218,7 @@ module SiSU_HTML_seg @@seg[:tocband] << format_head_seg.navigation_band(@@segtocband,@@seg[:dot_nav]) @@seg[:headers] << format_head_seg.seg_head_escript if SiSU_HTML_Format_type::Head_seg.method_defined? :seg_head_escript #debug PHP move up in text #bug @@seg[:headers] << format_head_seg.title_banner(@md.title,@md.subtitle,@dc_creator).gsub(clean,'') - paranum=if @@header1[/.+?<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/]; $1 + paranum=if @@header1[/.+?#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/]; $1 else '' end @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) @@ -228,7 +228,7 @@ module SiSU_HTML_seg end if @@is2 == 1 header2=@@header2 - paranum=if header2[/.+?<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/]; $1 + paranum=if header2[/.+?#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/]; $1 else '' end @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) @@ -238,7 +238,7 @@ module SiSU_HTML_seg end if @@is3 == 1 header3=@@header3 - paranum=if header3[/.+?<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/]; $1 + paranum=if header3[/.+?#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/]; $1 else '' end @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) @@ -248,7 +248,7 @@ module SiSU_HTML_seg end if @@is4 == 1 header4=@@header4 - paranum=if header4[/.+?<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/]; $1 + paranum=if header4[/.+?#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/]; $1 else '' end @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) @@ -261,47 +261,48 @@ module SiSU_HTML_seg def markup(para) @debug=[] format_head_seg=SiSU_HTML_Format_type::Head_seg.new(@md) - if para !~/^0~/ - m=para[/.+?<~(\d+);(?:[ohm]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/] + if para !~/^#{Rx[:meta]}/ + m=para[/.+?#{Mx[:id_o]}~(\d+);(?:[ohm]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/] if m paranum=m[1].to_s @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) end - if para =~/<:(?:code|alt|verse|group)>/m \ + if para =~/#{Mx[:gr_o]}(?:code|alt|verse|group)#{Mx[:gr_c]}/m \ or @@flag_alt==true - if para =~/<:(?:code|alt|verse|group)>/m + if para =~/#{Mx[:gr_o]}(?:code|alt|verse|group)#{Mx[:gr_c]}/m @group_collect=[] #unless @group_collect.class == Array - @group_collect << @vz.margin_txt_0 + para.gsub(/<:(?:code|alt|verse|group)-end>/m,'') #watch ! + @group_collect << @vz.margin_txt_0 + para.gsub(/#{Mx[:gr_o]}(?:code|alt|verse|group)-end#{Mx[:gr_c]}/m,'') #watch ! @@flag_alt=true elsif @@flag_alt==true - @group_collect << if para !~/<:(?:code|alt|verse|group)-end>/m # neither ideal nor necessary sort later + @group_collect << if para !~/#{Mx[:gr_o]}(?:code|alt|verse|group)-end#{Mx[:gr_c]}/m # neither ideal nor necessary sort later para else - para.gsub(/<:(?:code|alt|verse|group)-end>/m,'') + para.gsub(/#{Mx[:gr_o]}(?:code|alt|verse|group)-end#{Mx[:gr_c]}/m,'') end end - if para =~/<:(?:code|alt|verse|group)-end>/m + if para =~/#{Mx[:gr_o]}(?:code|alt|verse|group)-end#{Mx[:gr_c]}/m para=@group_collect.flatten.join @@flag_alt=false @group_collect=[] end end - if para !~/^[0-9]~/ - if para =~/(.*)<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>(.*)/ + if para !~/^#{Mx[:lv_o]}[1-9]:|#{Rx[:meta]}/ + if para =~/(.*)#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}(.*)/ one,two=$1,$2 format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) para=format_seg.no_paranum end end - if para[/<~(\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+><#@dp:#@dp)>$/] - @sto=SiSU_HTML::Source::Split_text_object.new(@md,para).lev_segname_para_ocn - format_txt_obj=SiSU_HTML_Format_type::Format_text_object.new(@md,@sto.text) if @sto.format =~/i[1-9]|_[1-9]?\*|<:i[1-9]>\s*_\*|null/ + if para[/#{Mx[:id_o]}~(\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp)#{Mx[:id_c]}$/] + @sto=SiSU_HTML::Source::Split_text_object.new(@md,para).html_seg + format_txt_obj=SiSU_HTML_Format_type::Format_text_object.new(@md,@sto.text) if @sto.format =~/i[1-9]|null/ #watch para=case @sto.format # work area 2003w29 ||@|def lev_segname_para_ocn| - when /^4~\S+/; @sto.seg_lev_para_ocn.header4 # work on see Split_text_object - when /^5~(?:~\S+)?/; @sto.seg_lev_para_ocn.header5 - when /^6~(?:~\S+)?/; @sto.seg_lev_para_ocn.header6 - when /^_\*$/; @sto.seg_lev_para_ocn.bullet - when /^_([1-9])\*$/ #indent levels 1-9 with bullet + when /^4:/; @sto.seg_lev_para_ocn.header4 # work on see Split_text_object + when /^5:/; @sto.seg_lev_para_ocn.header5 + when /^6:/; @sto.seg_lev_para_ocn.header6 + when /^#{Mx[:gl_bullet]}/ + @sto.seg_lev_para_ocn.bullet + when /^#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}#{Mx[:gl_bullet]}/ #indent levels 1-9 with bullet format_txt_obj.gsub_body para=@sto.seg_lev_para_ocn.format('li',"i#{$1}") when /^i([1-9])$/ #indent levels 1-9 @@ -313,18 +314,18 @@ module SiSU_HTML_seg @sto.seg_lev_para_ocn.code when /null/ if para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/ \ - and para !~/^/ + and para !~/^#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/ format_txt_obj.gsub_body @sto.seg_lev_para_ocn.para elsif para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/ \ - and para =~/^/ + and para =~/^#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/ format_txt_obj.gsub_body @sto.seg_lev_para_ocn.table_end else para end else para end - elsif para =~/¡|/,%{" href=\"endnotes#{@md.sfx}#_\\1">}) #endnote- twice #removed file type end if para !~/#{@vz.margin_txt_w1}|#{@vz.margin_txt_w2}/ - if para[/(.*)<~0;(?:u|[0-6]:)\d+;\w\d+><#@dp:#@dp>(.*)/] #% watch u & m? + if para[/(.*)#{Mx[:id_o]}~0;(?:u|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}(.*)/] #% watch u & m? one,two=$1,$2 format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) - para=format_seg.seg_no_paranum #% undefined +#FIX --> not that undefined, rather is not needed, should not be visited, and remove + #para=format_seg.seg_no_paranum #% undefined end para.gsub!(/\s*(-\{{2}~\d+|<:e[:_]\d+>).*/,'') #potentially dagerous - removes all paragraphs with #?? workpoint if para =~/ / #endnote- note- @@ -343,8 +345,8 @@ module SiSU_HTML_seg para=format_seg.no_paranum end end - if para =~/^4~\S+|4~!/ - para.gsub!(/4~\S+|<:[-_\w\d]?(-.+?-)?>|4~!.+/,'') #sort seg headers + if @sto.format=~/4:\S+/ + para.gsub!(/^\s*4:\S+\s*|<:[-_\w\d]?(-.+?-)?>|4~!.+/m,'') #sort seg headers @@seg[:main] << para @@seg[:main] << @@seg_subtoc[@@get_hash_fn] #% insertion of sub-toc else @@ -398,39 +400,39 @@ module SiSU_HTML_seg data.each do |para| para.gsub!(/(.+?)<\/a>/mi,'\1') if @md.flag_auto_endnotes - if para =~/^[1234]~/ \ + if para =~/^#{Mx[:lv_o]}[1234]:/ \ and not @@fn.empty? @@seg_endnotes[@@fn]=[] @@seg_endnotes[@@fn] << @@seg_endnotes_array - @@seg_endnotes_array=[] if para=~/^4~/ - @@fns_previous=@md.fns if para=~/^1~meta/ + @@seg_endnotes_array=[] if para=~/^#{Mx[:lv_o]}4:/ + @@fns_previous=@md.fns if para=~/^#{Mx[:lv_o]}1:meta#{Mx[:lv_c]}/ end - if para =~/^4~/ #% EXTRACTION OF SUB-TOCs + if para =~/^#{Mx[:lv_o]}4:/ #% EXTRACTION OF SUB-TOCs @@seg_subtoc[@@fn]=@@seg_subtoc_array @@seg_subtoc_array=[] end - if para =~/^4~/ #% SEGMENT NAME, after EXTRACTION OF ENDNOTES & SUB-TOCs - if para !~/^4~metadata/ - m=para[/^4~(\S+).+?<~(\d+);(?:[oh]|4:)\d+;\w\d+><#@dp:#@dp>$/] + if para =~/^#{Mx[:lv_o]}4:/ #% SEGMENT NAME, after EXTRACTION OF ENDNOTES & SUB-TOCs + if para !~/^#{Mx[:lv_o]}4:metadata#{Mx[:lv_c]}/ + m=para[/^#{Mx[:lv_o]}4:(\S+?)#{Mx[:lv_c]}.+?#{Mx[:id_o]}~(\d+);(?:[oh]|4:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/] end if m; @@fn,@@to_lev4=$1,$2 if m # changed 2004w07 #endnotes and sub-tocs else - if para !~/^4~metadata/ - m=para[/^4~(\S+)/] + if para !~/^#{Mx[:lv_o]}4:metadata#{Mx[:lv_c]}/ + m=para[/^#{Mx[:lv_o]}4:(\S+?)#{Mx[:lv_c]}/] @@fn,@@to_lev4=$1,'nonum' if m # changed 2005w13 else @@fn='' end end end end - if para =~/^[56]~\S*\s+(.+)?<~(\d+);(?:h|[56]:)\d+;\w\d+><#@dp:#@dp>$/ + if para =~/^#{Mx[:lv_o]}[56]:\S*?#{Mx[:lv_c]}\s*(.+)?#{Mx[:id_o]}~(\d+);(?:h|[56]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ para.gsub!(/ <\/a>/,' ') case para # series changed 2002w42 - when /^5~\S*\s+(.+)?<~(\d+);(?:h|[56]:)\d+;\w\d+><#@dp:#@dp>$/ #remove [u]? req by pg texts, revist + when /^#{Mx[:lv_o]}5:\S*?#{Mx[:lv_c]}\s*(.+)?#{Mx[:id_o]}~(\d+);(?:h|[56]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #remove [u]? req by pg texts, revist one,two=$1,$2 format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) para=format_seg.subtoc_lev5 - when /^6~\S*\s+(.+)?<~(\d+);(?:h|[56]:)\d+;\w\d+><#@dp:#@dp>$/ + when /^#{Mx[:lv_o]}6:\S*?#{Mx[:lv_c]}\s*(.+)?#{Mx[:id_o]}~(\d+);(?:h|[56]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ one,two=$1,$2 format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) para=format_seg.subtoc_lev6 @@ -438,27 +440,27 @@ module SiSU_HTML_seg @@seg_subtoc_array << para end if @md.flag_auto_endnotes - if para =~/~[{\[][\d*+]+ / # endnote- + if para =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[\d*+]+ /) try.each do |e| format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,e) - note_match=if e =~/<:i[1-9]>/ + note_match=if e =~/#{Mx[:pa_o]}i[1-9]#{Mx[:pa_c]}/ format_seg.endnote_body_seg_tail_indent else format_seg.endnote_body_seg_tail end @@ -466,14 +468,14 @@ module SiSU_HTML_seg end try.join('
') #% creation of separate end segment/page of all endnotes referenced back to reference segment - m=/(?:~\{[\d*+]+|~\[[*+]\d+)\s+(.+?href=")(#-[\d*+]+".+)[}\]]~/mi + m=/(?:#{Mx[:en_a_o]}[\d*+]+|#{Mx[:en_b_o]}[*+]\d+)\s+(.+?href=")(#-[\d*+]+".+)(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/mi one=note_match_seg[m,1] #note~ [a name] two=note_match_seg[m,2] #note- format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) note_match_all_seg=format_seg.endnote_seg_body(@@fn) #BUG WATCH 200408 @@seg[:endnote_all] << note_match_all_seg end - para.gsub!(/~[{\[].+?[}\]]~\s*/m,' ') + para.gsub!(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]}).+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})\s*/m,' ') end end end -- cgit v1.2.3