From d29a3e5469d8468084641c385ebf16948f7c2437 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 22 Jul 2008 20:00:59 -0400 Subject: sisu-0.68.0 proposed * middle layer document representation changed, (accounting for substantial patch) * texpdf multiple document sizes as specified in config * numerous small fixes [should on the whole be easier to maintain] --- lib/sisu/v0/manpage.rb | 179 +++++++++++++++++++------------------------------ 1 file changed, 69 insertions(+), 110 deletions(-) (limited to 'lib/sisu/v0/manpage.rb') diff --git a/lib/sisu/v0/manpage.rb b/lib/sisu/v0/manpage.rb index e267de48..06878094 100644 --- a/lib/sisu/v0/manpage.rb +++ b/lib/sisu/v0/manpage.rb @@ -68,6 +68,7 @@ module SiSU_manpage require "#{SiSU_lib}/manpage_format" include Format require "#{SiSU_lib}/shared_txt" + require "#{SiSU_lib}/shared_structure" pwd=Dir.pwd @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0 @@tablefoot='' @@ -100,54 +101,6 @@ module SiSU_manpage end end private - class Split_text_object ).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - if /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,$5 - elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@lev,@text,@ocn=$1,$2,$3,$4 - elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@text,@ocn=$1,$2,$3 - elsif /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;[um]\d+><#@dp:#@dp>$/m.match(@para) - @@alt_id_count+=1 - @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,"x#{@@alt_id_count}" - elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @@alt_id_count+=1 - @format,@lev,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}" - end - else - if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @text,@ocn=$1,$2 - end - if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/ #added 2002w06 - @text=/(.+?)/m.match(@para)[1] - end - if /^((\d)~(?:~\S+)?)\s+(.+)/m.match(@para) - @format,@lev,@text=$1,$2,$3 - end - end - format=@format.dup - @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - Format::Format_text_object.new(format,@text,@ocn) - else - Format::Format_text_object.new(format,@text,"<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>") - end - self - end - end class Scroll \s*)?\d~(?:(\S+))?\s+)?(.+?)\s*<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m # 2004w18 pb pn removal added + @regx=/^(?:(?:#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}\s*)?#{Mx[:lv_o]}\d:(\S*?)#{Mx[:lv_c]})?\s*(.+?)\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m # 2004w18 pb pn removal added @tab="\t" @br="\n" @@dostype='unix endnotes' @@ -171,12 +124,12 @@ module SiSU_manpage end # Used for extraction of endnotes from paragraphs def extract_endnotes(para='') - notes=para.scan(/~[{\[]([\d*+]+\s+.+?)\s*<#@dp>[}\]]~/) + notes=para.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m) @n=[] notes.each do |n| #high cost to deal with
appropriately within manpage, consider n=n.dup.to_s - if n =~// - fix = n.split(//) #watch #added + if n =~/#{Mx[:br_line]}/ + fix = n.split(/\s*#{Mx[:br_line]}+\s*/) #watch #added fix.each do |x| unless x.empty?; @n << x end @@ -201,7 +154,7 @@ module SiSU_manpage GSUB ) else - wrap.gsub!(/^(.+)\Z/m, </ + if paragraph =~/#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}/ m=$1.to_i - paragraph.gsub!(/<:i#{m}>/,'') + paragraph.gsub!(/#{Mx[:pa_o]}:i#{m}#{Mx[:pa_c]}/,'') util=SiSU_text_utils::Wrap.new(paragraph,78,m*2) else util=SiSU_text_utils::Wrap.new(paragraph,78,0) end @@ -277,6 +230,7 @@ WOK w.gsub!(/^(\\\.)/,' \1') w end + #wrapped.gsub!(/(?:#{Mx[:br_line]}|#{Mx[:br_nl]})\s*/m,"\n\n") if wrapped if lv times=wrapped.length times=78 if times > 78 @@ -312,106 +266,107 @@ WOK table_message='[table omitted, see other document formats]' fix=[] data.each do |para| - para.gsub!(//,'') # remove dummy headings (used by html) #check - para.gsub!(/_\*\s+/,'* ') # bullet markup, marked down - para.gsub!(/(.+?)<\/sup>/,'^\1^') - para.gsub!(/(.+?)<\/sub>/,'[\1]') - para.gsub!(/(.+?)<\/i>/,'<:br>.I \1<:br>') - para.gsub!(/\A(.+?)<\/b>
/m,'<:br>.BI \1<:br>') - para.gsub!(/(.+?)<\/b>/,'<:br>.B \1<:br>') - para.gsub!(/(.+?)<\/u>/,'<:br>.I \1<:br>') - unless para =~/<:code>/ + para.gsub!(/#{Mx[:gl_bullet]}\s*/,'* ') # bullet markup, marked down + para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'^\1^') + para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'[\1]') + para.gsub!(/\A\s*#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}#{Mx[:br_line]}/m,"#{Mx[:br_line]}.I \\1#{Mx[:br_line]}") + para.gsub!(/\s*#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/m,"#{Mx[:br_line]}.I \\1#{Mx[:br_line]}") + para.gsub!(/\A\s*#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}#{Mx[:br_line]}/m,"#{Mx[:br_line]}.BI \\1#{Mx[:br_line]}") + para.gsub!(/\s*#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,"#{Mx[:br_line]}.B \\1#{Mx[:br_line]}") + para.gsub!(/\s*#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,"#{Mx[:br_line]}.I \\1#{Mx[:br_line]}") + unless para =~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/ para.gsub!(/(?:^|\s)\{(.+?)\}((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1 #{@url_brace.txt_open}\\2#{@url_brace.txt_close}\\3") - para.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.txt_open}\\2#{@url_brace.txt_close}\\3") + para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.txt_open}\\2#{@url_brace.txt_close}\\3") @manpage[:endnotes]=extract_endnotes(para) - para.gsub!(/~[{\[]([\d*+]+)\s+(?:.+?)[}\]]~/,'[^\1]') # endnote marker marked up - para.gsub!(/&/,'&') - para.gsub!(/!/,'!') - para.gsub!(/#/,'#') - para.gsub!(/*/,'*') - para.gsub!(/-/,'-') - para.gsub!(///,'/') - para.gsub!(/_/,'_') - para.gsub!(/{/,'{') - para.gsub!(/}/,'}') - para.gsub!(/~/,'~') - para.gsub!(/©/,'©') + para.gsub!(/#{Mx[:en_a_o]}([\d*+]+)\s*(?:.+?)#{Mx[:en_a_c]}/m,'[^\1]') # endnote marker marked up + para.gsub!(/#{Mx[:en_b_o]}([\d*+]+)\s*(?:.+?)#{Mx[:en_b_c]}/m,'[^\1]') # endnote marker marked up + #para.gsub!(/~[{\[]([\d*+]+)\s+(?:.+?)[}\]]~/,'[^\1]') # endnote marker marked up + para.gsub!(/#{Mx[:gl_o]}#amp#{Mx[:gl_c]}/,'&') ##{Mx[:gl_o]}#095#{Mx[:gl_c]} + para.gsub!(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!') + para.gsub!(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#') + para.gsub!(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*') + para.gsub!(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-') + para.gsub!(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/') + para.gsub!(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_') + para.gsub!(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{') + para.gsub!(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}') + para.gsub!(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~') + para.gsub!(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©') end para.gsub!(/(^| |\s|\*)\\\*/,'\1\\\\\*') #man page requires para.gsub!(/\s\.(\S+)/,' \\.\1') para.gsub!(/(\n\.)(\S\S\S+)/m,'\1\\.\2') para.gsub!(/-/,'\-') #manpages use this para.gsub!(/~/,'~') if para #manpages use this - if para =~/<:(?:group|verse|alt|code)(?:-end)?>(?:\s+<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>)?/ - if para =~/<:code>/ #code-block: angle brackets special characters + if para =~/#{Mx[:gr_o]}(?:group|verse|alt|code)(?:-end)?#{Mx[:gr_c]}(?:\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_c]}#@dp:#@dp#{Mx[:id_c]})?/ + if para =~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/ #code-block: angle brackets special characters para.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _> _< para.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _<_< #para.gsub!(/(?![}])_([<>])/m,'\1') # _> _< }_< end - para.gsub!(//,"\n\n.P\n\n") # watch - para.gsub!(/<:(?:group|verse|alt)(?:\\-end)?>(?:\s+<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>)?/,'') - para.gsub!(/<:code>(?:\s+<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>)?/,"\n\n.nf\n\n") - para.gsub!(/<:code\\-end>(?:\s+<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>)?/,"\n\n.fi\n\n") + para.gsub!(/(?:#{Mx[:br_line]}|#{Mx[:br_nl]})+\s*/,"\n\n.BR\n\n") # watch + para.gsub!(/#{Mx[:gr_o]}(?:group|verse|alt)(?:\\-end)?#{Mx[:gr_c]}(?:\s+#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/,'') + para.gsub!(/#{Mx[:gr_o]}code#{Mx[:gr_c]}(?:\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/,"\n\n.nf\n\n") + para.gsub!(/#{Mx[:gr_o]}code\\-end#{Mx[:gr_c]}(?:\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/,"\n\n.fi\n\n") else - para.gsub!(//,"\n\n") #watch introduces a bug + para.gsub!(/(?:#{Mx[:br_line]}|#{Mx[:br_nl]})\s*/,"\n\n") #unless para =~/#{Mx[:lv_o]}\d:/ #watch introduces a bug end blit=para.scan(/\[[^\]]+\]|[^\[]+/) blit_array=[] blit.each do |x| if x =~/^\[/ x.gsub!(/\s+/,' \ ') #manpages use this - elsif x =~/\.(?:TP|BI)\s/ - x.gsub!(/\s+/,' \ ') #manpages use this else x end blit_array << x end - para = blit_array.join - para.gsub!(/\s\\\s+(<:br>|)/,'\1') #a messy solution - para.gsub!(/\s(\[)/,' \ \1') #manpages use this - para.gsub!(/<:p[bn]>/,'') # remove page breaks - para.gsub!(/^\s*<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/,'') # remove empty lines - check - para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') + para=blit_array.join + para.gsub!(/#{Mx[:gl_o]}:name#\S+?#{Mx[:gl_c]}/mi,'') #added + para.gsub!(/\s\\\s+(#{Mx[:br_line]}|#{Mx[:br_nl]})/,'\1') #a messy solution + para.gsub!(/#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}/,'') # remove page breaks + para.gsub!(/^\s*#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/,'') # remove empty lines - check + para.gsub!(/(^|#{Mx[:gl_c]}|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') para.gsub!(/(.+?)<\/a>/m,'\1') - para.gsub!(/<:name#\S+?>/,'') # remove name links + para.gsub!(/#{Mx[:mk_o]}name#\S+?#{Mx[:mk_c]}/,'') # remove name links para.gsub!(/ /,' ') # decide on para.gsub!(/(["''])/,"\\\\\\1") # quotation marks need escape para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") para.gsub!(/^(?:^|[^_\\])\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') wordlist=para.scan(/\S+/) - if para =~/^0~(\S+)\s+(.+?)\Z/m # for headers + if para =~/^#{Rx[:meta]}\s*(.+?)\Z/m # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta if d_meta; manpage_metadata(d_meta) end end - if para !~/(^0~||)/ + if para !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ if para =~@regx #/.+?<~\d+;\w\d+;\w\d+>.*/ #watch change paranum=para[@regx,3] @p_num=Format::Paragraph_number.new(paranum) end - @sto=Split_text_object.new(para).lev_segname_para_ocn + @sto=SiSU_Structure::Split_text_object.new(@md,para).txt ### problem in scroll, it appears tables are getting paragraph numbers - m=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + m=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ if para =~m \ and para=~/\S+/ para=case @sto.format - when /^(1)~(?:(\S+))?/ + when /^(1):(\S*)/ manpage_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body1 - when /^(2)~(?:(\S+))?/ + when /^(2):(\S*)/ manpage_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body2 - when /^(3)~(?:(\S+))?/ + when /^(3):(\S*)/ manpage_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body3 - when /^(4)~(\S+)/ # work on see SiSU_text_parts::Split_text_object + when /^(4):(\S+)/ # work on see SiSU_text_parts::Split_text_object manpage_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body4 - when /^(5)~(?:(\S+))?/ + when /^(5):(\S*)/ manpage_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body5 - when /^(6)~(?:(\S+))?/ + when /^(6):(\S*)/ manpage_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body6 #when /^(i1)$/ @@ -440,17 +395,17 @@ WOK elsif para =~/#{table_message}/ @manpage[:body] << para << @br elsif para =~/(Note|Endnotes?)/ \ - and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ elsif para =~/(MetaData)/ \ - and para =~/<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/ #debug 2003w46 add rc info ####suspect visit + and para =~/#{Mx[:id_o]}~(\d+);[um]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #debug 2003w46 add rc info ####suspect visit #formatMono=MonoSiSU.new('
MetaData') #para=formatMono.bold_para elsif para.include? 'Owner Details' \ - and para !~/<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + and para !~/#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #formatMono=MonoSiSU.new('
Owner Details') #@@manpage[:owner_details]=formatMono.bold_para #para='' - elsif para =~/(¡|(.*)/ one,two=$1,$2 format_text=Format_text_object.new(one,two) @@ -461,7 +416,7 @@ WOK para='' end case para - when /<:i[1-9]>/ + when /#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/ if para =~/.*<:#>.*$/m format_text=Format_text_object.new(para,'') para=format_text.scr_indent_one_no_paranum @@ -475,10 +430,14 @@ WOK format_text=Format_text_object.new(one,two) para=format_text.center end - para.gsub!(/~/,'~') if para #manpages use this - para.gsub!(/{/,'{') if para #manpages use this + para.gsub!(/\s(\[)/m,' \ \1') if para #manpages use this + para.gsub!(/(?:#{Mx[:br_line]}|#{Mx[:br_nl]})/,"\n\n") if para + para.gsub!(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~') if para #manpages use this + para.gsub!(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{') if para #manpages use this + para.gsub!(/#{Mx[:pa_o]}\S+#{Mx[:pa_c]}/,' ') if para ## Clean Prepared Text para.gsub!(//,' ') if para ## Clean Prepared Text para.gsub!(/<:\S+>/,' ') if para ## Clean Prepared Text + para end end @manpage -- cgit v1.2.3