From d29a3e5469d8468084641c385ebf16948f7c2437 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 22 Jul 2008 20:00:59 -0400 Subject: sisu-0.68.0 proposed * middle layer document representation changed, (accounting for substantial patch) * texpdf multiple document sizes as specified in config * numerous small fixes [should on the whole be easier to maintain] --- lib/sisu/v0/concordance.rb | 49 +- lib/sisu/v0/conf.rb | 1 + lib/sisu/v0/constants.rb | 109 ++- lib/sisu/v0/css.rb | 8 +- lib/sisu/v0/dal.rb | 451 +++++----- lib/sisu/v0/dal_doc_str.rb | 83 +- lib/sisu/v0/dal_doc_str_code.rb | 40 +- lib/sisu/v0/dal_doc_str_tables.rb | 28 +- lib/sisu/v0/dal_syntax.rb | 261 +++--- lib/sisu/v0/db_import.rb | 145 ++-- lib/sisu/v0/defaults.rb | 187 +++++ lib/sisu/v0/digests.rb | 55 +- lib/sisu/v0/help.rb | 2 +- lib/sisu/v0/html.rb | 145 ++-- lib/sisu/v0/html_format.rb | 19 +- lib/sisu/v0/html_format_css.rb | 6 +- lib/sisu/v0/html_scroll.rb | 59 +- lib/sisu/v0/html_segments.rb | 152 ++-- lib/sisu/v0/html_table.rb | 34 +- lib/sisu/v0/html_tune.rb | 56 +- lib/sisu/v0/hub.rb | 4 +- lib/sisu/v0/manifest.rb | 58 +- lib/sisu/v0/manpage.rb | 179 ++-- lib/sisu/v0/manpage_format.rb | 4 +- lib/sisu/v0/odf.rb | 190 ++--- lib/sisu/v0/odf_format.rb | 52 +- lib/sisu/v0/param.rb | 40 +- lib/sisu/v0/particulars.rb | 3 +- lib/sisu/v0/plaintext.rb | 162 ++-- lib/sisu/v0/plaintext_format.rb | 4 +- lib/sisu/v0/shared_html_lite.rb | 37 +- lib/sisu/v0/shared_structure.rb | 151 ++++ lib/sisu/v0/shared_txt.rb | 180 ++-- lib/sisu/v0/shared_xml.rb | 110 +-- lib/sisu/v0/sst_do_inline_footnotes.rb | 6 +- lib/sisu/v0/sst_to_s_xml_dom.rb | 50 +- lib/sisu/v0/sst_to_s_xml_node.rb | 153 ++-- lib/sisu/v0/sst_to_s_xml_sax.rb | 48 +- lib/sisu/v0/sysenv.rb | 30 +- lib/sisu/v0/texinfo.rb | 58 +- lib/sisu/v0/texinfo_format.rb | 70 +- lib/sisu/v0/texpdf.rb | 417 +++++---- lib/sisu/v0/texpdf_format.rb | 1449 ++++++++++++++++---------------- lib/sisu/v0/urls.rb | 2 + lib/sisu/v0/wikispeak.rb | 48 +- lib/sisu/v0/xhtml.rb | 87 +- lib/sisu/v0/xml.rb | 91 +- lib/sisu/v0/xml_dom.rb | 81 +- lib/sisu/v0/xml_fictionbook.rb | 6 +- lib/sisu/v0/xml_format.rb | 4 +- lib/sisu/v0/xml_scaffold.rb | 4 +- lib/sisu/v0/xml_tables.rb | 61 +- 52 files changed, 3148 insertions(+), 2581 deletions(-) create mode 100644 lib/sisu/v0/shared_structure.rb (limited to 'lib') diff --git a/lib/sisu/v0/concordance.rb b/lib/sisu/v0/concordance.rb index 1b777bb5..f62b20ac 100644 --- a/lib/sisu/v0/concordance.rb +++ b/lib/sisu/v0/concordance.rb @@ -183,18 +183,18 @@ WOK @path="#{@env.path.output}/#{@md.fnb}" @freq=Hash.new(0) @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern - @rxp_to=Regexp.new("<~(\\d+);(?:[oh]|[0-6]:)\\d+;\\w\\d+><#@dp:#@dp>$") - @rxp_lv1=Regexp.new('^1~') #line start markers removed, ('^1~') for exceptions \n\n4{{{ - @rxp_lv2=Regexp.new('^2~') - @rxp_lv3=Regexp.new('^3~') - @rxp_seg=Regexp.new('^4~(.+?)\s+') - @rxp_title=Regexp.new('^0~title\s*(.+?)\s*$') + @rxp_to=Regexp.new("#{Mx[:id_o]}~(\\d+);(?:[oh]|[0-6]:)\\d+;\\w\\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}|#{Mx[:id_o]}\S+?#{Mx[:id_c]}$") + @rxp_lv1=/^#{Mx[:lv_o]}1:/ + @rxp_lv2=/^#{Mx[:lv_o]}2:/ + @rxp_lv3=/^#{Mx[:lv_o]}3:/ + @rxp_seg=/^#{Mx[:lv_o]}4:(\S+?)#{Mx[:lv_c]}/ + @rxp_title=Regexp.new("^#{Mx[:meta_o]}title#{Mx[:meta_c]}\s*(.+?)\s*$") @rxp_t1=Regexp.new('^T1') @rxp_t2=Regexp.new('^T2') @rxp_t3=Regexp.new('^T3') - @rxp_excluded1=/(?:https?|file|ftp):\/\/\S+/mi - @rxp_excluded0=/^(?:to\d+|\d+| |EOF|thumb_\S+|snap_\S+|_+|-+|ii+|iv|vi+|ix|xi+|xiv|xv|xvi+|xix|xx|\S+?_\S+|[\d_]+\w\S+|[\w\d]{1,2}|\d{1,3}\w?|#@dp|[0-9a-f]{16,64}|\d{2,3}x\d{2,3}|\S{0,2}sha\d|\S{0,3}\d{4}w\d\d|\b\w\d+|\d_all\b|e\.?g\.?)$/mi #this regex causes and cures a stack dump in ruby 1.9 !!! - @rgx_scanlist=%r{(?:(?:[a-zA-Z0-9"\s]){2,7}|(?:[a-zA-Z0-9"\s]){2,7}|(?:https?|file)://\S+)|code\{.+?\}code|<\S+?>|\w+}mi + @rxp_excluded1=/(?:https?|file|ftp):\/\/\S+/ + @rxp_excluded0=/^(?:#{Mx[:fa_bold_o]}|#{Mx[:fa_italics_o]})?(?:to\d+|\d+| |#{Mx[:br_endnotes]}|EOF|#{Mx[:br_eof]}|thumb_\S+|snap_\S+|_+|-+|[(]?(?:ii+|iv|vi+|ix|xi+|xiv|xv|xvi+|xix|xx)[).]?|\S+?_\S+|[\d_]+\w\S+|[\w\d]{1,2}|\d{1,3}\w?|#@dp|[0-9a-f]{16,64}|\d{2,3}x\d{2,3}|\S{0,2}sha\d|\S{0,3}\d{4}w\d\d|\b\w\d+|\d_all\b|e\.?g\.?)(?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})?$/mi #this regex causes and cures a stack dump in ruby 1.9 !!! + @rgx_scanlist=%r{#{Mx[:fa_italics_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_italics_c]}|#{Mx[:fa_bold_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_bold_c]}|(?:https?|file)://\S+|#{Mx[:gr_o]}code#{Mx[:gr_o]}.+?#{Mx[:gr_o]}code-end#{Mx[:gr_o]}|<\S+?>|#{Mx[:id_o]}\S+?#{Mx[:id_c]}|\w+|[a-zA-Z]+}mi rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error end end @@ -231,20 +231,39 @@ WOK @seg,toy=nil,nil @word_map={} @dal_array.each do |line| - if line !~/<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/ # lines to ignore: # are added but not part of authors substantive text; 0 are mostly machine generated - if line =~@rxp_seg; @seg=line[@rxp_seg,1] + if line !~/#{Mx[:id_o]}~(\d+);[um]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #lines to ignore: # are added but not part of authors substantive text; 0 are mostly machine generated + if line =~@rxp_seg; @seg=line[@rxp_seg,1] end - if line =~@rxp_to; toy=line[@rxp_to,1] + if line =~@rxp_to; toy=line[@rxp_to,1] end if toy =~/\d+/ \ and toy !~/^0$/ for word in line.scan(@rgx_scanlist) #%take in word or other match + #word.gsub!(@rxp_clean,'') + word.gsub!(/#{Mx[:fa_o]}\S+?#{Mx[:fa_o_c]}/,'') + word.gsub!(/#{Mx[:fa_c_o]}\S+?#{Mx[:fa_c]}/,'') + word.gsub!(/#{Mx[:gl_o]}#[a-z]+#{Mx[:gl_c]}/,'') + word.gsub!(/#{Mx[:gl_o]}#[0-9]+#{Mx[:gl_c]}/,'') + word.gsub!(/^\S$/,'') + word=nil if word.empty? word=nil if word =~@rxp_excluded0 #watch word=nil if word =~@rxp_excluded1 #watch + word=nil if word =~/^\S$/ if word - #word.gsub!(/<\/?[i]>/,'') + word.gsub!(/#{Mx[:br_nl]}|#{Mx[:br_line]}/,' ') + word.gsub!(/#{Mx[:lv_o]}\d:\S*?#{Mx[:lv_c]}/,'') + word.gsub!(/#{Mx[:pa_o]}:i\d#{Mx[:pa_c]}/,'') + word.gsub!(/#{Mx[:id_o]}~\d+;\S+?#{Mx[:id_c]}/,'') + word.gsub!(/#{Mx[:fa_o]}[a-z]{1,7}#{Mx[:fa_o_c]}|#{Mx[:fa_c_o]}[a-z]{1,7}#{Mx[:fa_c]}/,'') + word.gsub!(/#{Mx[:mk_o]}(?:[0-9a-f]{32}:[0-9a-f]{32}|[0-9a-f]{64}:[0-9a-f]{64})#{Mx[:mk_c]}/,'') + word.gsub!(/#{Mx[:mk_o]}(?:[0-9a-f]{32}|[0-9a-f]{64})#{Mx[:mk_c]}/,'') + word.gsub!(/#{Mx[:en_a_o]}(?:\d|[*+])*|#{Mx[:en_b_o]}(?:\d|[*+])*|#{Mx[:en_a_c]}|#{Mx[:en_b_c]}/mi,'') + word.gsub!(/#{Mx[:fa_o]}\S+?#{Mx[:fa_o_c]}/,''); word.gsub!(/#{Mx[:fa_c_o]}\S+?#{Mx[:fa_c]}/,'') + #word.gsub!(/#{Mx[:fa_o]}\S+?#{Mx[:fa_c]}/,'') #watch word.gsub!(/<\/?\S+?>/,'') + word.gsub!(/^\@+/,'') word.strip! + word.gsub!(/#{Mx[:tc_p]}.+/,'') word.gsub!(/[\.,;:"]$/,'') word.gsub!(/["]/,'') word.gsub!(/^\s*[\(]/,'') @@ -252,8 +271,12 @@ WOK word.gsub!(/^(?:See|e\.?g\.?).+/,'') word.gsub!(/^\s*[.,;:]\s*/,'') word.strip! + word.gsub!(/^\(?[a-zA-Z]\)$/,'') word.gsub!(/^\d+(st|nd|rd|th)$/,'') word.gsub!(/^(\d+\.?)+$/, '') + word.gsub(/#{Mx[:mk_o]}|#{Mx[:mk_c]}/,'') + word.gsub!(/^\S$/,'') + word=nil if word =~/^\S$/ word=nil if word =~/^\s*$/ #watch if word unless word =~/[A-Z][A-Z]/ \ diff --git a/lib/sisu/v0/conf.rb b/lib/sisu/v0/conf.rb index 639847ab..5fc9343c 100644 --- a/lib/sisu/v0/conf.rb +++ b/lib/sisu/v0/conf.rb @@ -111,6 +111,7 @@ module SiSU_Initialize tell=SiSU_Screen::Ansi.new(@opt.cmd,'invert','Copy images','') tell.colorize unless @opt.cmd =~/q/ SiSU_Env::Create_site.new(@opt.cmd).cp_local_images + SiSU_Env::Create_site.new(@opt.cmd).cp_webserver_images_local #this should not have been necessary SiSU_Env::Create_site.new(@opt.cmd).cp_base_images #base images (nav etc.) used by all html end def cp_external_images diff --git a/lib/sisu/v0/constants.rb b/lib/sisu/v0/constants.rb index 1fa40fb1..68093673 100644 --- a/lib/sisu/v0/constants.rb +++ b/lib/sisu/v0/constants.rb @@ -1,9 +1,10 @@ +# coding:utf-7 =begin * Name: SiSU * Description: a framework for document structuring, publishing and search - #___# + constants * Author: Ralph Amissah @@ -59,6 +60,110 @@ ** Description: system environment, resource control and configuration details =end - +Mx,Rx={},{} +Mx[:meta_o],Mx[:meta_c]='〔@','〕' +Mx[:lv_o_1],Mx[:lv_o_2],Mx[:lv_o_3],Mx[:lv_o_4],Mx[:lv_o_5],Mx[:lv_o_6],Mx[:lv_o_7],Mx[:lv_o_8],Mx[:lv_o_9]= + '〔1:','〔2:','〔3:','〔4:','〔5:','〔6:','〔7:','〔8:','〔9:'; +Mx[:lv_o],Mx[:lv_c]='〔','〕' +# '1~','2~','3~','4~','5~','6~','7~','8~','9~'; Mx[:lv_c]='' +Mx[:en_a_o]='【'; Mx[:en_a_c]='】' #Mx[:en_a_o]='~{'; Mx[:en_a_c]='}~' +Mx[:en_b_o]='〖'; Mx[:en_b_c]='〗' #Mx[:en_b_o]='~['; Mx[:en_b_c]=']~' +Mx[:br_line]="#{Mx[:mk_o]}br#{Mx[:mk_c]}" +Mx[:gr_o]='〔'; Mx[:gr_c]='〕' #group text mark +Mx[:id_o]='〔'; Mx[:id_c]='〕' #object id mark +Mx[:tc_o]='『'; Mx[:tc_c]='』' #table column mark +Mx[:tc_p]='┆' #table row/misc mark +Mx[:pa_o]='〔'; Mx[:pa_c]='〕' #affects paragraph mark +Mx[:mk_o]='〔'; Mx[:mk_c]='〕' #generic mark +Mx[:gl_o]='〔'; Mx[:gl_c]='〕' #glyph +Mx[:fa_o]='〔'; Mx[:fa_o_c]='¤'; Mx[:fa_c_o]='¤'; Mx[:fa_c]='〕' +Mx[:fa_bold_o]= "#{Mx[:fa_o]}b#{Mx[:fa_o_c]}"; Mx[:fa_bold_c]= "#{Mx[:fa_c_o]}b#{Mx[:fa_c]}" +Mx[:fa_italics_o]= "#{Mx[:fa_o]}i#{Mx[:fa_o_c]}"; Mx[:fa_italics_c]= "#{Mx[:fa_c_o]}i#{Mx[:fa_c]}" +Mx[:fa_underscore_o]= "#{Mx[:fa_o]}u#{Mx[:fa_o_c]}"; Mx[:fa_underscore_c]= "#{Mx[:fa_c_o]}u#{Mx[:fa_c]}" +Mx[:fa_cite_o]= "#{Mx[:fa_o]}cite#{Mx[:fa_o_c]}"; Mx[:fa_cite_c]= "#{Mx[:fa_c_o]}cite#{Mx[:fa_c]}" +Mx[:fa_insert_o]= "#{Mx[:fa_o]}ins#{Mx[:fa_o_c]}"; Mx[:fa_insert_c]= "#{Mx[:fa_c_o]}ins#{Mx[:fa_c]}" +Mx[:fa_strike_o]= "#{Mx[:fa_o]}del#{Mx[:fa_o_c]}"; Mx[:fa_strike_c]= "#{Mx[:fa_c_o]}del#{Mx[:fa_c]}" +Mx[:fa_superscript_o]="#{Mx[:fa_o]}sup#{Mx[:fa_o_c]}"; Mx[:fa_superscript_c]="#{Mx[:fa_c_o]}sup#{Mx[:fa_c]}" +Mx[:fa_subscript_o]= "#{Mx[:fa_o]}sub#{Mx[:fa_o_c]}"; Mx[:fa_subscript_c]= "#{Mx[:fa_c_o]}sub#{Mx[:fa_c]}" +Mx[:fa_hilite_o]= "#{Mx[:fa_o]}hi#{Mx[:fa_o_c]}"; Mx[:fa_hilite_c]= "#{Mx[:fa_c_o]}hi#{Mx[:fa_c]}" +Mx[:gl_bullet]= "#{Mx[:gl_o]}●#{Mx[:gl_c]}" +#non substantive text sort: <-#> <~#> + Mx[:pa_non_object_dummy_heading]="#{Mx[:pa_o]}-##{Mx[:pa_c]}" #unnumbered paragraph, delete when not required [used in dummy headings, eg. for segmented html] (place marker at end of paragraph) + Mx[:pa_non_object_no_heading]="#{Mx[:pa_o]}~##{Mx[:pa_c]}" #unnumbered paragraph (place marker at end of paragraph) +Mx[:br_line]= "#{Mx[:mk_o]}br#{Mx[:mk_c]}" +Mx[:br_paragraph]= "#{Mx[:mk_o]}br#{Mx[:mk_c]}" +Mx[:br_nl]= "#{Mx[:mk_o]}nl#{Mx[:mk_c]}" +Mx[:br_page]= "#{Mx[:mk_o]}pb#{Mx[:mk_c]}" +Mx[:br_page_new]= "#{Mx[:mk_o]}pn#{Mx[:mk_c]}" +Mx[:br_endnotes]= "#{Mx[:mk_o]}ENDNOTES#{Mx[:mk_c]}" +Mx[:br_eof]= "#{Mx[:mk_o]}EOF#{Mx[:mk_c]}" +Mx[:lnk_o]='「'; Mx[:lnk_c]='」' +Mx[:sm_set_o]='《'; Mx[:sm_set_c]='》' +Mx[:sm_subset_o]='《 '; Mx[:sm_subset_c]='》' +#Mx[:sm_set_o]='∈ '; Mx[:sm_set_c]='∋ ' +#Mx[:sm_subset_o]='∈ '; Mx[:sm_subset_c]='∋ ' +Rx[:mx_fa_clean]= /#{Mx[:fa_o]}.+?#{Mx[:fa_c]}|#{Mx[:pa_o]}.+?#{Mx[:pa_c]}|#{Mx[:mk_o]}.+?#{Mx[:mk_c]}/ +Rx[:lv],Rx[:lv_1],Rx[:lv_2],Rx[:lv_3],Rx[:lv_4],Rx[:lv_5],Rx[:lv_6],Rx[:lv_7],Rx[:lv_8],Rx[:lv_9]= + /〔([1-9]):(\S*?)〕/,/#{Mx[:lv_o_1]}(\S*?)#{Mx[:lv_c]}/,/#{Mx[:lv_o_2]}(\S*?)#{Mx[:lv_c]}/,/#{Mx[:lv_o_3]}(\S*?)#{Mx[:lv_c]}/,/#{Mx[:lv_o_4]}(\S*?)#{Mx[:lv_c]}/,/#{Mx[:lv_o_5]}(\S*?)#{Mx[:lv_c]}/,/#{Mx[:lv_o_6]}(\S*?)#{Mx[:lv_c]}/,/#{Mx[:lv_o_7]}(\S*?)#{Mx[:lv_c]}/,/#{Mx[:lv_o_8]}(\S*?)#{Mx[:lv_c]}/,/#{Mx[:lv_o_9]}(\S*?)#{Mx[:lv_c]}/ +Rx[:meta]=/#{Mx[:meta_o]}(\S+?)#{Mx[:meta_c]}/ +unless RUBY_VERSION < '1.9' + Mx[:meta_o].force_encoding('utf-8'); Mx[:meta_c].force_encoding('utf-8') + Mx[:lv_o_1].force_encoding('utf-8'); Mx[:lv_o_2].force_encoding('utf-8'); Mx[:lv_o_3].force_encoding('utf-8'); Mx[:lv_o_4].force_encoding('utf-8'); Mx[:lv_o_5].force_encoding('utf-8'); Mx[:lv_o_6].force_encoding('utf-8'); Mx[:lv_o_7].force_encoding('utf-8'); Mx[:lv_o_8].force_encoding('utf-8'); Mx[:lv_o_9].force_encoding('utf-8') + Mx[:lv_o].force_encoding('utf-8'); Mx[:lv_c].force_encoding('utf-8') + Mx[:gr_o].force_encoding('utf-8'); Mx[:gr_c].force_encoding('utf-8') + Mx[:id_o].force_encoding('utf-8'); Mx[:id_c].force_encoding('utf-8') + Mx[:tc_p].force_encoding('utf-8') + Mx[:tc_o].force_encoding('utf-8'); Mx[:tc_c].force_encoding('utf-8') + Mx[:pa_o].force_encoding('utf-8'); Mx[:pa_c].force_encoding('utf-8') + Mx[:mk_o].force_encoding('utf-8'); Mx[:mk_c].force_encoding('utf-8') + Mx[:gl_o].force_encoding('utf-8'); Mx[:gl_c].force_encoding('utf-8') + Mx[:fa_o].force_encoding('utf-8'); Mx[:fa_c].force_encoding('utf-8') + Mx[:fa_o_c].force_encoding('utf-8'); Mx[:fa_c_o].force_encoding('utf-8') + Mx[:fa_bold_o].force_encoding('utf-8'); Mx[:fa_bold_c].force_encoding('utf-8') + Mx[:fa_italics_o].force_encoding('utf-8'); Mx[:fa_italics_c].force_encoding('utf-8') + Mx[:fa_underscore_o].force_encoding('utf-8'); Mx[:fa_underscore_c].force_encoding('utf-8') + Mx[:fa_cite_o].force_encoding('utf-8'); Mx[:fa_cite_c].force_encoding('utf-8') + Mx[:fa_insert_o].force_encoding('utf-8'); Mx[:fa_insert_c].force_encoding('utf-8') + Mx[:fa_strike_o].force_encoding('utf-8'); Mx[:fa_strike_c].force_encoding('utf-8') + Mx[:fa_superscript_o].force_encoding('utf-8'); Mx[:fa_superscript_c].force_encoding('utf-8') + Mx[:fa_subscript_o].force_encoding('utf-8'); Mx[:fa_subscript_c].force_encoding('utf-8') + Mx[:fa_hilite_o].force_encoding('utf-8'); Mx[:fa_hilite_c].force_encoding('utf-8') + Mx[:gl_bullet].force_encoding('utf-8') + Mx[:pa_non_object_dummy_heading].force_encoding('utf-8'); Mx[:pa_non_object_no_heading].force_encoding('utf-8') + Mx[:br_line].force_encoding('utf-8'); Mx[:br_nl].force_encoding('utf-8') + Mx[:br_paragraph].force_encoding('utf-8') + Mx[:br_page].force_encoding('utf-8'); Mx[:br_page_new].force_encoding('utf-8') + Mx[:br_endnotes].force_encoding('utf-8') + Mx[:br_eof].force_encoding('utf-8') + Mx[:lnk_o].force_encoding('utf-8'); Mx[:lnk_c].force_encoding('utf-8') + Mx[:sm_set_o].force_encoding('utf-8'); Mx[:sm_set_c].force_encoding('utf-8') + Mx[:sm_subset_o].force_encoding('utf-8'); Mx[:sm_subset_c].force_encoding('utf-8') +# + Rx[:mx_fa_clean].force_encoding('utf-8') + Rx[:lv].force_encoding('utf-8');Rx[:lv_1].force_encoding('utf-8');Rx[:lv_2].force_encoding('utf-8');Rx[:lv_3].force_encoding('utf-8');Rx[:lv_4].force_encoding('utf-8');Rx[:lv_5].force_encoding('utf-8');Rx[:lv_6].force_encoding('utf-8');Rx[:lv_7].force_encoding('utf-8');Rx[:lv_8].force_encoding('utf-8');Rx[:lv_9].force_encoding('utf-8') + Rx[:meta].force_encoding('utf-8') +end __END__ +consider: + 〔comment〕 + 〔links?????〕 + import document? +check: + bold line + +┆┆⋮┇┊┋ +『』 +「」 +〔〕 +【】 + +· +¤ + #˝ " λ Ω β α π Ѫ Ж Я Ѳ ѳ Ф ㈣ + Ѳ ѳ Ф + ♩ ♭  ✠  ▭ ▬ ▪ +【】〖〗《》「」 + ‹ › ∗  +'〔lv1〕','〔lv2〕','〔lv3〕','〔lv4〕','〔lv5〕','〔lv6〕','〔lv7〕','〔lv8〕','〔lv9〕' +'〔 Ѳ1〕','〔 Ѳ2〕','〔 Ѳ3〕','〔 Ѳ4〕','〔 Ѳ5〕','〔Ѳ6〕','〔Ѳ7〕','〔Ѳ8〕','〔Ѳ9〕' diff --git a/lib/sisu/v0/css.rb b/lib/sisu/v0/css.rb index d1798a77..cdd05528 100644 --- a/lib/sisu/v0/css.rb +++ b/lib/sisu/v0/css.rb @@ -581,7 +581,9 @@ WOK color: #000077; } li.doc { - background: url(../image/bullet_doc.png) no-repeat 0 0; + background: url(../image/bullet_09.png) no-repeat 0px 6px; + /* background: url(../image/bullet_doc.png) no-repeat 0 0; */ + /* background: url(../../_sisu/image/bullet_09.png) no-repeat 0px 6px; */ padding-left: 16px; margin-left: 10px; margin-top: 0px; @@ -603,6 +605,7 @@ WOK text-align: left; } li.refcenter { + background: url(../image/bullet_09.png) no-repeat 0px 6px; /* background: url(../image/bullet_doc.png) no-repeat 0 0; */ background: none; padding-left: 20px; @@ -614,7 +617,8 @@ WOK text-align: center; } li.refbold { - background: url(../image/bullet_doc.png) no-repeat 0 0; + background: url(../image/bullet_09.png) no-repeat 0px 6px; + /* background: url(../image/bullet_doc.png) no-repeat 0 0; */ /* padding-left: 0px; */ padding-left: 16px; margin-left: 0; diff --git a/lib/sisu/v0/dal.rb b/lib/sisu/v0/dal.rb index 3e5e11ac..cb6d70f8 100644 --- a/lib/sisu/v0/dal.rb +++ b/lib/sisu/v0/dal.rb @@ -70,7 +70,7 @@ module SiSU_DAL include SiSU_Env include SiSU_Param include SiSU_Viz - include Syntax + include SiSU_Syntax class Instantiate < SiSU_Param::Parameters::Instructions def initialize @@flag_vocab=0 @@ -199,6 +199,7 @@ module SiSU_DAL data=data.join.split("\n\n") data=expand_insertions?(data) data=SiSU_document_structure::Code.new(@md,data).code + data=substitutions_and_insertions?(data) data_new=[] data.each do |x| data_new << if x =~ /\n\n/m; x.split(/\n\n+/) @@ -206,8 +207,7 @@ module SiSU_DAL end end data=data_new.flatten - data=substitutions_and_insertions?(data) - data=Syntax::Markup.new(@md,data).songsheet + data=SiSU_Syntax::Markup.new(@md,data).songsheet data=character_check(data) data=images(data) data=SiSU_document_structure::Tables.new(@md,data).tables @@ -234,12 +234,12 @@ module SiSU_DAL para.gsub!(/^([12])~\?\s+/,'\1~ ') #conditional header for incorporated document 2004w12 para.gsub!(/^[{~}]\s*$/,'') para.gsub!(/^#{@@comment}.*/,'') #remove comment and divider #% - para.gsub!(/<~#>|~#\s*/,'<~#>') - para.gsub!(/-#\s*/,'<-#><~#>') + para.gsub!(/<~#>|~#\s*/,"#{Mx[:fa_o]}~##{Mx[:fa_c]}") + para.gsub!(/-#\s*/,"#{Mx[:fa_o]}-##{Mx[:fa_c]}#{Mx[:fa_o]}~##{Mx[:fa_c]}") #para.gsub!(/(#\{{3} arch-tag:|0\{{3}~cvs)\s+/, "0{{~rcs ") #KEEP ... ENABLE WIDER USE OF REVISION CONTROL - para.gsub!(/(~\{ )\s+/,'\1') - para.gsub!(/ \/\//,'
') #added 2004w29 - para.gsub!(/
/,'
') #needed by xml, xhtml etc. + para.gsub!(/(#{Mx[:en_a_o]} )\s+/,'\1'); para.gsub!(/(~\{ )\s+/,'\1') + para.gsub!(/ \/\//,"#{Mx[:br_line]}") #added 2004w29 + para.gsub!(/
/,"#{Mx[:br_line]}") #needed by xml, xhtml etc. #para.gsub!(/

/,'

') #consider para.gsub!(/`/,"'") para.gsub!(/\t/,' ') @@ -252,12 +252,12 @@ module SiSU_DAL para.gsub!(/\\trademark\b|\\tm\b/,'®') #non_utf8(para) para=para + "\n" - unless para =~/^<:code>/ + unless para =~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/ case para when /\^~/ # endnotes #% Note must do this first (earlier loop) and then enter gathered data into ~^\d+ sub_para=para.dup - @@endnote_array << sub_para.gsub!(/\n/,'').gsub!(/\^~\s+(.+)\s*/, %{~\{#{endnote_no} \\1 \}~}).strip + @@endnote_array << sub_para.gsub!(/\n/,'').gsub!(/\^~\s+(.+)\s*/, %{#{Mx[:en_a_o]}#{endnote_no} \\1 #{Mx[:en_a_c]}}).strip endnote_no+=1 para=nil if para =~/\^~ .+/ #removes 'binary' endnote now in endnote array for later insertion end @@ -383,7 +383,7 @@ module SiSU_DAL manifest="#{pre}{#{txt} }#{@u.remote}/#{url_dir}/toc.html#{note}\n\n" else puts "error, does currently support relative paths (reltive paths were removed, as had problems for citation, and was not suited to all output types should possibly reconsider) #{__FILE__} #{__LINE__}" - if para =~/\{(?:~\^\s+)?(.+?)\s\[(\d[sS]*)\]\}\.\.\/(\S+?)\/(\s+~\{.+?\}~)?/ + if para =~/\{(?:~\^\s+)?(.+?)\s\[(\d[sS]*)\]\}\.\.\/(\S+?)\/(\s+#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]})?/ txt,cmd,url_dir,note=$1,$2,$3,$4 manifest="{ #{txt} }../#{url_dir}/toc.html#{note}\n\n" end @@ -452,7 +452,7 @@ module SiSU_DAL tuned_file end def substitutions_and_insertions?(data) - tuned_file=[] + data_expand=[] if data[0] =~ /^#!\s*(?:\/usr\/bin\/env sisu|\/usr\/bin\/sisu)/ # remove bang from top #! (however file is stripped, so will be removed provided no content preceeds it) data[0].gsub!(/^#!\s*\/usr\/bin\/sisu/,'') data[0].gsub!(/^#!\s*\/usr\/bin\/env sisu/,'') @@ -462,16 +462,6 @@ module SiSU_DAL data[0].gsub!(/^(sisu-[\d.]+)$/,'% \1') end data.each do |para| - para=if @md.markup_version.to_f >= 0.38 - SiSU_document_structure::Structure.new(@md,para).structure_markup_normalize - else para - end - #para.gsub!(//,'\1') #consider, would permit use of text hyperlinks if desired, dal_syntax more appropriate? - para.gsub!(/^((?:[1-9]|:?[A-C])~\S*)\s*$/,'\1~ [Note: heading marker::required title missing]~#') #conditional header for incorporated document 2004w12 - if para =~/^@\S+?:/ - para.gsub!(/^@(\S+?):\s+/,'0~\1 ') - para.gsub!(/^@(\S+?):([+-])\s+/,'0~\1\2 ') - end if para =~/<:insert\d+!?>/ \ and para !~/^%\s+/ @skin.select @@ -501,13 +491,25 @@ module SiSU_DAL para=[] ins.insert7.split(/\n\n/).each{|x| para << x << "\n"} end - para.each{|x| tuned_file << x } - else tuned_file << para + para.each{|x| data_expand << x } + else data_expand << para + end + data_expand.flatten! + data_expand.compact! + end + data_expand.each do |para| + para=if @md.markup_version.to_f >= 0.38 + SiSU_document_structure::Structure.new(@md,para).structure_markup_normalize + else + SiSU_document_structure::Structure.new(@md,para).structure_marks + end + #para.gsub!(//,'\1') #consider, would permit use of text hyperlinks if desired, dal_syntax more appropriate? + para.gsub!(/^((?:[1-9]|:?[A-C])~\S*)\s*$/,'\1~ [Note: heading marker::required title missing]~#') #conditional header for incorporated document 2004w12 + if para =~/^@\S+?:/ + para.gsub!(/^@(\S+?):\s+/,"#{Mx[:meta_o]}\\1#{Mx[:meta_c]}") + para.gsub!(/^@(\S+?):([+-])\s+/,"#{Mx[:meta_o]}\\1\\2#{Mx[:meta_c]}") end - tuned_file.flatten! - tuned_file.compact! end - tuned_file end def numbering_song(data) data=number_plaintext_para(data) @@ -524,8 +526,10 @@ module SiSU_DAL def number_plaintext_para(data) @tuned_file=[] data.each do |para| - para.gsub!(/(^|[^<][^v][^>])\n/,'\1 ') #messy, but idea is that tables should retain breaks - para.gsub!(/^/,"\n") unless para =~/¡/u + if para !~/#{Mx[:gr_o]}(?:code|group|alt|poem|verse)#{Mx[:gr_c]}|#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ + para.gsub!(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks + end + para.gsub!(/^/,"\n") unless para =~/#{Mx[:tc_p]}/u para.gsub!(/^\s+|\s$/,"\n") @tuned_file << para end @@ -557,27 +561,27 @@ module SiSU_DAL # debug 2003w46 adding revision control info if @md.flag_auto_endnotes \ and @md.flag_separate_endnotes_make - @tuned_file << "\n4~endnotes Endnotes <-#> <~0;0:0;u0>" + @tuned_file << "\n#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}Endnotes #{Mx[:gl_o]}-##{Mx[:gl_c]} #{Mx[:id_o]}~0;0:0;u0#{Mx[:id_c]}" end - @tuned_file << "\n" + @tuned_file << "\n#{Mx[:br_endnotes]}" #DECIDE ON @tuned_file=@tuned_file.flatten end def owner_details_seg - data << '4~owner.details Owner Details' + data << "#{Mx[:lv_o]}4:owner.details#{Mx[:lv_c]}Owner Details" end def number_sub_heading(para,num,title_no) case para - when /#{num}~- /; para.gsub!(/#{num}~- /,"#{title_no} ") - when /^#{num}~#\s*/; para.gsub!(/^#{num}~#\s*/,"#{title_no} ") - when /^#{num}~[a-z_\.]+ / - para.gsub!(/^#{num}~([a-z_\.]+)\s+(.+)/i,%{#{num}~\\1 #{title_no} \\2 <:name##{title_no}>}) - when /^#{num}~\s+#{title_no}/ - para.gsub!(/^#{num}~ /,"#{num}~#{title_no} ") #where title contains title number - else para.gsub!(/^#{num}~ /,"#{num}~#{title_no} #{title_no} ") #main, where title number is to be provided + when /#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/; para.gsub!(/#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/,"#{title_no} ") + when /^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/; para.gsub!(/^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/,"#{title_no} ") + when /^#{Mx[:lv_o]}#{num}:[a-z_\.]+#{Mx[:lv_c]}/ + para.gsub!(/^#{Mx[:lv_o]}#{num}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i,%{#{Mx[:lv_o]}#{num}:\\1#{Mx[:lv_c]} #{title_no} \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}}) + when /^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}\s*#{title_no}/ + para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]}") #where title contains title number + else para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]} #{title_no} ") #main, where title number is to be provided end if @md.toc_lev_limit \ and @md.toc_lev_limit < num - para.gsub!(/^[5-8]~(?:~\S+)?\s*/,'!_ ') + para.gsub!(/^#{Mx[:lv_o]}[5-9]:\S*?#{Mx[:lv_c]}/,'!_ ') #bold line, watch end para end @@ -596,61 +600,61 @@ module SiSU_DAL if (@md.markup =~/num_top/ \ or (@md.num_top \ and @md.num_top !~/^$/)) \ - and para !~/^0~/ + and para !~/^#{Rx[:meta]}/ if (para =~/^(?:#{no1}|^#{no2}|^#{no3}#{no4})~#/ \ - and para !~/^4~endnotes?/) + and para !~/^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}/) t_not+=1 #; t_no2=0; t_no3=0 - para.gsub!(/^(#{no1})~#\s*/,"\\1~ps#{t_not} ") - para.gsub!(/^(#{no2})~#\s*/,"\\1~ps#{t_not} ") - para.gsub!(/^(#{no3})~#\s*/,"\\1~ps#{t_not} ") - para.gsub!(/^(#{no4})~#\s*/,"\\1~ps#{t_not} ") + para.gsub!(/^(#{Mx[:lv_o]}#{no1}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") + para.gsub!(/^(#{Mx[:lv_o]}#{no2}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") + para.gsub!(/^(#{Mx[:lv_o]}#{no3}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") + para.gsub!(/^(#{Mx[:lv_o]}#{no4}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") end - if para =~/#{no1}~/ + if para =~/#{Mx[:lv_o]}#{no1}:/ @subnumber=1 - @subnumber=0 if para =~/#{no1}~/ + @subnumber=0 if para =~/#{Mx[:lv_o]}#{no1}:/ end - if para =~/^[0-6]~[ \w-]/ \ - and para !~ /(?:[0-6]~[\w-]+-|4~endnotes|^[0-6]~([a-z_\.]+)\s+[\d.]+)\s/ \ - and para !~/<~#>|<-#>/ - if para =~/^#{no1}~/ + if para =~/^#{Mx[:lv_o]}[1-6]:[\w-]*#{Mx[:lv_c]}/ \ + and para !~ /(?:#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}|#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^#{Mx[:lv_o]}[1-6]:[a-z_\.]+#{Mx[:lv_c]}\s+[\d.]+)\s/ \ + and para !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ + if para =~/^#{Mx[:lv_o]}#{no1}:/ t_no1+=1; t_no2=0; t_no3=0 title_no="#{t_no1}" if not @md.seg_names.nil? \ and not @md.seg_names.include?(title_no) - para.gsub!(/^#{no1}~\s+(\S+)#/,"#{no1}~#{title_no} \\1 #{title_no} ") #shift placement of auto-number to after first word, e.g. Article # not # Article, added on occasion of ABF (20040329) + para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*(\S+)#/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]} \\1 #{title_no} ") #shift placement of auto-number to after first word, e.g. Article # not # Article, added on occasion of ABF (20040329) para.gsub!(/^#{no1}\{\s+(Article|Clause|Section)\s+#/i,%{#{no1}~#{title_no} \\1 #{title_no}. }) - unless para =~/^#{no1}~\s+[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review - para.gsub!(/^#{no1}~\s+/,"#{no1}~#{title_no} #{title_no}. ") + unless para =~/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review + para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]}#{title_no}. ") end @md.seg_names << title_no #else puts "warning segment name #{title_no} already exists" end - unless para =~/^#{no1}~([a-z_\.]+)\s+[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required - para.gsub!(/^#{no1}~([a-z_\.]+)\s+(.+)/i,%{#{no1}~\\1 #{title_no}. \\2 <:name##{title_no}>}) + unless para =~/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required + para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i, + %{#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}#{title_no}. \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}}) end + para.gsub!(/^#{Mx[:lv_o]}#{no1}:##{Mx[:lv_c]}/,"#{title_no}. ") #watch para.gsub!(/^#{no1}~#\s*/,"#{title_no}. ") end - if para =~/^#{no2}~/ + if para =~/^#{Mx[:lv_o]}#{no2}:\S*?#{Mx[:lv_c]}/ t_no2+=1; t_no3=0 title_no="#{t_no1}.#{t_no2}" para=number_sub_heading(para,no2,title_no) end - if para =~/^#{no3}~/ + if para =~/^#{Mx[:lv_o]}#{no3}:\S*?#{Mx[:lv_c]}/ t_no3+=1 title_no="#{t_no1}.#{t_no2}.#{t_no3}" para=number_sub_heading(para,no3,title_no) end - elsif para =~ /^[0-6]~[\w-]+-/ # endnotes, watch2005 - para.gsub!(/^#{no1}~([a-z_\.]+)- /,"#{no1}~\\1 ") - para.gsub!(/^#{no2}~([a-z_\.]+)- /,"#{no2}~\\1 ") - para.gsub!(/^#{no3}~([a-z_\.]+)- /,"#{no3}~\\1 ") + elsif para =~/^#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}/ # endnotes, watch2005 + para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}") #"#{no1}~\\1 ") + para.gsub!(/^#{Mx[:lv_o]}#{no2}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no2}:\\1#{Mx[:lv_c]}") + para.gsub!(/^#{Mx[:lv_o]}#{no3}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no3}:\\1#{Mx[:lv_c]}") end elsif @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4 - unless para =~ /^[0-6]~\S+/ #endnotes watch? - if para =~/^[1-6]~\s+([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d - name_num=$1 - para.gsub!(/^([1-6]~)\s+/,"\\1#{name_num} ") - end + if para =~/^#{Mx[:lv_o]}[1-9]:#{Mx[:lv_c]}([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d + name_num=$1 + para.gsub!(/^#{Mx[:lv_o]}([1-9]:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{name_num}#{Mx[:lv_c]}") end if @md.toc_lev_limit end @@ -663,7 +667,7 @@ module SiSU_DAL @tuned_file=[] object_array=SiSU_document_structure::OCN.new(@md,data).ocn object_array.each do |o| - @tuned_file <<= if o.ocn; "#{o.txt} <~#{o.ocn};#{o.lv};#{o.type}>" #main ocn descriptor + @tuned_file <<= if o.ocn; "#{o.txt} #{Mx[:id_o]}~#{o.ocn};#{o.lv};#{o.type}#{Mx[:id_c]}" #main ocn descriptor else o.txt end end @@ -675,8 +679,8 @@ module SiSU_DAL letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z ) data.each do |para| if para =~/\w|\S|<|\(/ - if para !~/^%% |^0~|^4~endnotes|^<\/center>|<:ee>|<:e[:_]>|^\^~ |<:e[:_]\d+?>|^<:p[bn]>|^<:\#|<:- |<[:!]!4|^(?:alt|code|group|poem|table)\{|^\}(?:alt|code|group|poem|table)|^\}table$|||||<\/tr>|


|\[endnotes\]|<:zz>|<:isbn-|<:journal-|<:conference-|/i #ocn here #  added with Tune.code #¡ - if para=~/^[1-8]~/; number_small,letter_small=0,0 #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later) + if para !~/^%% |#{Rx[:meta]}|^0~|^#{Mx[:lv_o]}endnotes:|^4~endnotes|^<\/center>|<:ee>|<:e[:_]>|^\^~ |<:e[:_]\d+?>|^#{Mx[:fa_o]}:p[bn]#{Mx[:fa_c]}|^<:\#|<:- |<[:!]!4|#{Mx[:gr_o]}(?:alt|code|group|poem|table)#{Mx[:gr_c]}|^(?:alt|code|group|poem|table)\{|^\}(?:alt|code|group|poem|table)|^\}table$|||||<\/tr>|
|\[endnotes\]|<:zz>|<:isbn-|<:journal-|<:conference-|#{Mx[:br_endnotes]}/i #ocn here #  added with Tune.code #¡ + if para=~/^#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/; number_small,letter_small=0,0 #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later) end if para =~/^#[ 1]/ letter_small=0 @@ -685,7 +689,7 @@ module SiSU_DAL para.gsub!(/^#[ 1]/,"#{number_small}. ") #change 2004 end if para =~/^_# / - para.gsub!(/^_# /,"<:i1> #{letter[letter_small]}. ") #change 2004 + para.gsub!(/^_# /,"#{Mx[:fa_o]}:i1#{Mx[:fa_c]} #{letter[letter_small]}. ") #change 2004 letter_small+=1 end end @@ -717,53 +721,51 @@ module SiSU_DAL end data.each do |para| para=SiSU_document_structure::Structure.new(@md,para).structure_markup - if para =~/^[456]~ / - if para=~/^4/ \ + if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}/ + if para=~/^#{Mx[:lv_o]}[4]:#{Mx[:lv_c]}/ \ and not @md.set_heading_seg @md.set_heading_seg=true end - if para =~/^[456]~(?:\s\S+)?\s+([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name + if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}(?:\s*\S+)?\s+([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name pattern=$1 pattern.gsub!(/(?:[:,-]|\W)/,'.') pattern.gsub!(/\.$/,'') if not @md.seg_names.nil? \ and not @md.seg_names.include?(pattern) - para.gsub!(/^([456])~\s*/,"\\1~#{pattern} ") + para.gsub!(/^#{Mx[:lv_o]}([456]):#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1:#{pattern}#{Mx[:lv_c]}") @md.seg_names << pattern else puts 'warn, there may be a conflicting numbering scheme' if @md.cmd =~/[VM]/ end end - if para =~/^4~\s.+?;4:(\d+);/m #extract segment name from embedded document structure info + if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}.+?;4:(\d+);/m #extract segment name from embedded document structure info pattern=$1 pattern.gsub!(/(?:[:,-]|\W)/,'.') pattern.gsub!(/\.$/,'') if not @md.seg_names.nil? \ and not @md.seg_names.include?(pattern) - para.gsub!(/^(4)~\s*/,"\\1~#{pattern} ") + para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{pattern}#{Mx[:lv_c]}") @md.seg_names << pattern else - para.gsub!(/^(4)~\s*/,"\\1~~#{pattern} ") + para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1~#{pattern}#{Mx[:lv_c]}") @md.seg_names << "~#{pattern}" end end - if para =~/^4~\s+/ #if still not segment name, provide a numerical one + if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}/ #if still not segment name, provide a numerical one if not @md.seg_names.nil? \ and not @md.seg_names.include?(art_filename_auto) - para.gsub!(/^4~\s+/,%{4~_#{art_filename_auto} }) + para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,%{#{Mx[:lv_o]}\\1~#{art_filename_auto}#{Mx[:lv_c]}}) @md.seg_names << art_filename_auto else puts 'segment name (numbering) error' end art_filename_auto+=1 end end - @tuned_file << if para =~/^([1-6])~/m \ - and (@md.pagenew \ - or @md.pagebreak) + @tuned_file << if para =~/^#{Mx[:lv_o]}([1-6]):\S*?#{Mx[:lv_c]}/m \ + and (@md.pagenew or @md.pagebreak) m=$1 #watch ref~ para_tmp=[] - if @md.pagenew.inspect =~/#{m}/; para_tmp << "<:pn>\n" << para - end - if @md.pagebreak.inspect =~/#{m}/; para_tmp << "<:pb>\n" << para + if @md.pagenew.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page_new]}\n" << para + elsif @md.pagebreak.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page]}\n" << para end para_result=unless para_tmp.length > 0; para else para_tmp @@ -782,11 +784,11 @@ module SiSU_DAL @tuned_file=[] data.each do |para| unless @md.set_heading_top - if para !~/^(?:@\S+:|0~\S+)\s/m \ + if para !~/^(?:#{Rx[:meta]}|@\S+:|0~\S+)\s/m \ and para !~/\A\s*\Z/m @md.set_heading_top=true - head=if @md.title ; "1~ #{@md.title}" - else '1~ [no title provided]' + head=if @md.title ; "#{Mx[:lv_o]}1:#{Mx[:lv_c]} #{@md.title}" + else "#{Mx[:lv_o]}1:#{Mx[:lv_c]} [no title provided]" end @tuned_file << head end @@ -802,12 +804,12 @@ module SiSU_DAL @tuned_file=[] data.each do |para| unless @md.set_heading_seg - if para !~/^(?:@\S+:|0~\S+|[123]~)/m \ + if para !~/^(?:#{Rx[:meta]}|#{Mx[:lv_o]}[123]:\S*?#{Mx[:lv_c]})/m \ and para !~/\A\s*\Z/m \ - and para !~/<:p[bn]>/ + and para !~/#{Mx[:br_page]}|#{Mx[:br_page_new]}/ @md.set_heading_seg=true - head=if @md.title ; "4~seg [#{@md.title}]" - else '4~seg [segment]' + head=if @md.title ; "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [#{@md.title}]" + else "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [segment]" end @tuned_file << head end @@ -825,7 +827,7 @@ module SiSU_DAL unless @md.set_header_title if para !~/^%{1,2}\s/m \ and para !~/\A\s*\Z/m - @tuned_file << "0~title #{@md.heading_seg_first}" + @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}" @md.title=@md.heading_seg_first @md.set_header_title=true end @@ -842,23 +844,23 @@ module SiSU_DAL data.each do |para| # manually numbered endnotes --> if @md.mod.inspect =~/--no-asterisk|--no-annotate/ - para.gsub!(/~\[[*]\s.+?\]~/,'') + para.gsub!(/#{Mx[:en_b_o]}\s.+?#{Mx[:en_b_c]}/,'') end if @md.mod.inspect =~/--no-dagger|--no-annotate/ - para.gsub!(/~\[[+]\s.+?\]~/,'') + para.gsub!(/#{Mx[:en_b_o]}[+]\s.+?#{Mx[:en_b_c]}/,'') end - unless para =~/^<:code>/ + unless para =~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/ case para # auto-numbered endnotes --> - when /~\{\s+.+?\}~|~\[[*+]\s+.+?\]~/ - para.gsub!(/\s*(\}~|\]~)/,' \1') # required 2003w31 - word_mode=para.scan(/<:group>\n|\n<:group-end>|\S+/m) + when /#{Mx[:en_a_o]}\s+.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}[*+]\s+.+?#{Mx[:en_b_c]}/ + para.gsub!(/\s*(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/,' \1') # required 2003w31 + word_mode=para.scan(/#{Mx[:gr_o]}group#{Mx[:gr_c]}\n|\n#{Mx[:gr_o]}group-end#{Mx[:gr_c]}|\S+/m) word_mode=endnote_call_number(word_mode) para=word_mode.join(' ') endnote_ref+=1 when /~\^(?:\s|$)|<:e>/ #%Note inserts endnotes previously gathered from /^(|[-~]\{{3})/ (in earlier loop) - word_mode=para.scan(/<:group>\n|\n<:group-end>|\S+/m) + word_mode=para.scan(/#{Mx[:gr_o]}group#{Mx[:gr_c]}\n|\n#{Mx[:gr_o]}group-end#{Mx[:gr_c]}|\S+/m) word_mode=endnote_call_number(word_mode) para=word_mode.join(' ') endnote_ref+=1 @@ -870,19 +872,19 @@ module SiSU_DAL end def endnote_call_number(data) data.each do |word| - unless data =~/^<:code>/ + unless data =~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/ case word - when /~\{/ - unless word =~/~\{[*+]+/ - word.gsub!(/~\{/,"~\{#{@@endnote_counter} ") + when /#{Mx[:en_a_o]}/ + unless word =~/#{Mx[:en_a_o]}[*+]+/ + word.gsub!(/#{Mx[:en_a_o]}/,"#{Mx[:en_a_o]}#{@@endnote_counter} ") @@endnote_counter+=1 end - when /~\[/ - if word =~/~\[[+]/ - word.gsub!(/~\[[+]/,"~\[\+#{@@endnote_counter_dag} ") + when /#{Mx[:en_b_o]}/ + if word =~/#{Mx[:en_b_o]}[+]/ + word.gsub!(/#{Mx[:en_b_o]}[+]/,"#{Mx[:en_b_o]}\+#{@@endnote_counter_dag} ") @@endnote_counter_dag+=1 else - word.gsub!(/~\[[*]?/,"~\[\*#{@@endnote_counter_asterisk} ") + word.gsub!(/#{Mx[:en_b_o]}[*]?/,"#{Mx[:en_b_o]}\*#{@@endnote_counter_asterisk} ") @@endnote_counter_asterisk+=1 end when /~\^|<:e>/ @@ -898,106 +900,105 @@ module SiSU_DAL base_html="#{dir.url.root}/#{@md.fnb}" ocnm=ocnd=ocnv=0 ocnm+=1 - header0='<:pn>' - header1="\n1~meta Document Information (metadata) <~0;0:0;m#{ocnm}>" + header1="\n#{Mx[:lv_o_1]}meta#{Mx[:lv_c]}Document Information (metadata) #{Mx[:id_o]}~0;0:0;m#{ocnm}#{Mx[:id_c]}" ocnm+=1 - header4="\n4~metadata Metadata <~0;m#{ocnm};m#{ocnm}>" + header4="\n#{Mx[:lv_o_4]}metadata#{Mx[:lv_c]}Metadata #{Mx[:id_o]}~0;m#{ocnm};m#{ocnm}#{Mx[:id_c]}" ocnm+=1; ocnd+=1 - head_no_dc="<~0;m#{ocnm};d#{ocnd}>" + head_no_dc="#{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" ocnm+=1; ocnd+=1 - head_no_dc_tag="<~0;m#{ocnm};d#{ocnd}>" + head_no_dc_tag="#{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" data.each do |para| case para - when /^0~(title|creator|author|translator|translated_by|illustrator|illustrated_by|prepared_by|digitized_by|description|publisher|contributor|date\.created|date\.issued|date\.available|date\.valid|date\.modified|date|type|format|rights|identifier|source|language)/i + when /^#{Mx[:meta_o]}(title|creator|author|translator|translated_by|illustrator|illustrated_by|prepared_by|digitized_by|description|publisher|contributor|date\.created|date\.issued|date\.available|date\.valid|date\.modified|date|type|format|rights|identifier|source|language)#{Mx[:meta_c]}/i m=$1 ocnm+=1; ocnd+=1 @dc << case para - when /^0~title/ - "\n#{@tr.dc_title}: #{@md.dc_title} <~0;m#{ocnm};d#{ocnd}>" - when /^0~(?:creator|author)/ - "\n#{@tr.creator}: #{@md.dc_creator} <~0;m#{ocnm};d#{ocnd}>" - when /0~(?:translator|translated_by)/ - "\n#{@tr.translator}: #{@md.translator} <~0;m#{ocnm};d#{ocnd}>" - when /^0~(?:illustrator|illustrated_by)/ - "\n#{@tr.illustrator}: #{@md.illustrator} <~0;m#{ocnm};d#{ocnd}>" - when /^0~prepared_by/ - "\n#{@tr.prepared_by}: #{@md.prepared_by} <~0;m#{ocnm};d#{ocnd}>" - when /^0~digitized_by/ - "\n#{@tr.digitized_by}: #{@md.digitized_by} <~0;m#{ocnm};d#{ocnd}>" - when /^0~description/ - "\n#{@tr.description}: #{@md.dc_description} <~0;m#{ocnm};d#{ocnd}>" - when /^0~subject/ - "\n#{@tr.subject}: #{@md.dc_subject} <~0;m#{ocnm};d#{ocnd}>" - when /^0~abstract/ - "\n#{@tr.abstract}: #{@md.dc_abstract} <~0;m#{ocnm};d#{ocnd}>" - when /^0~publisher/ - "\n#{@tr.publisher}: #{@md.dc_publisher} <~0;m#{ocnm};d#{ocnd}>" - when /^0~contributor/ - "\n#{@tr.contributor}: #{@md.dc_contributor} <~0;m#{ocnm};d#{ocnd}>" - when /^0~date.created/ - "\n#{@tr.date_created}: #{@md.dc_date_created} <~0;m#{ocnm};d#{ocnd}>" - when /^0~date.issued/ - "\n#{@tr.date_issued}: #{@md.dc_date_issued} <~0;m#{ocnm};d#{ocnd}>" - when /^0~date.available/ - "\n#{@tr.date_available}: #{@md.dc_date_available} <~0;m#{ocnm};d#{ocnd}>" - when /^0~date.modified/ - "\n#{@tr.date_modified}: #{@md.dc_date_modified} <~0;m#{ocnm};d#{ocnd}>" - when /^0~date.valid/ - "\n#{@tr.date_valid}: #{@md.dc_date_valid} <~0;m#{ocnm};d#{ocnd}>" - when /^0~date/ - "\n#{@tr.date}: #{@md.dc_date} <~0;m#{ocnm};d#{ocnd}>" - when /^0~type/ - "\n#{@tr.type}: #{@md.dc_type} <~0;m#{ocnm};d#{ocnd}>" - when /^0~format/ - "\n#{@tr.format}: #{@md.dc_format} <~0;m#{ocnm};d#{ocnd}>" - when /^0~rights/ - "\n#{@tr.rights}: #{@md.dc_rights} <~0;m#{ocnm};d#{ocnd}>" - when /^0~identifier/ - "\n#{@tr.identifier}: #{@md.dc_identifier} <~0;m#{ocnm};d#{ocnd}>" - when /^0~source/ - "\n#{@tr.source}: #{@md.dc_source} <~0;m#{ocnm};d#{ocnd}>" - when /^0~language/ - "\n#{@tr.language}: #{@md.dc_language} <~0;m#{ocnm};d#{ocnd}>" - when /^0~language.original/ - "\n#{@tr.language_original}: #{@md.language_original} <~0;m#{ocnm};d#{ocnd}>" - when /^0~relation/ - "\n#{@tr.relation}: #{@md.dc_relation} <~0;m#{ocnm};d#{ocnd}>" - when /^0~coverage/ - "\n#{@tr.coverage}: #{@md.dc_coverage} <~0;m#{ocnm};d#{ocnd}>" - when /^0~keywords/ - "\n#{@tr.keywords}: #{@md.keywords} <~0;m#{ocnm};d#{ocnd}>" - when /^0~comments/ - "\n#{@tr.comments}: #{@md.comments} <~0;m#{ocnm};d#{ocnd}>" - when /^0~cls_loc/ - "\n#{@cls_dewey}: #{@md.cls_dewey} <~0;m#{ocnm};d#{ocnd}>" - when /^0~cls_dewey/ - "\n#{@tr.cls_dewey}: #{@md.cls_dewey} <~0;m#{ocnm};d#{ocnd}>" - when /^0~cls_gutenberg|0~cls_pg/ - "\n#{@tr.cls_gutenberg}: #{@md.cls_gutenberg} <~0;m#{ocnm};d#{ocnd}>" - #"\n#{@tr.cls_gutenberg}: #{@md.cls_pg} <~0;m#{ocnm};d#{ocnd}>" - when /^0~cls_isbn/ - "\n#{@tr.cls_isbn}: #{@md.cls_isbn} <~0;m#{ocnm};d#{ocnd}>" - when /^0~prefix(?:_a)?/ - "\n#{@tr.prefix_a}: #{@md.prefix_a} <~0;m#{ocnm};d#{ocnd}>" - when /^0~prefix_b/ - "\n#{@tr.prefix_b}: #{@md.prefix_b} <~0;m#{ocnm};d#{ocnd}>" - else para.gsub(/^0~(#{m})\s+(.+)/m,"\n#{m.capitalize}: \\2 <~0;m#{ocnm};d#{ocnd}>") + when /^#{Mx[:meta_o]}title#{Mx[:meta_c]}/ + "\n#{@tr.dc_title}: #{Mx[:fa_underscore_o]}#{@md.dc_title}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}(?:creator|author)#{Mx[:meta_c]}/ + "\n#{@tr.creator}: #{Mx[:fa_underscore_o]}#{@md.dc_creator}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /#{Mx[:meta_o]}(?:translator|translated_by)#{Mx[:meta_c]}/ + "\n#{@tr.translator}: #{Mx[:fa_underscore_o]}#{@md.translator}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}(?:illustrator|illustrated_by)#{Mx[:meta_c]}/ + "\n#{@tr.illustrator}: #{Mx[:fa_underscore_o]}#{@md.illustrator}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}prepared_by#{Mx[:meta_c]}/ + "\n#{@tr.prepared_by}: #{Mx[:fa_underscore_o]}#{@md.prepared_by}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}digitized_by#{Mx[:meta_c]}/ + "\n#{@tr.digitized_by}: #{Mx[:fa_underscore_o]}#{@md.digitized_by}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}description#{Mx[:meta_c]}/ + "\n#{@tr.description}: #{Mx[:fa_underscore_o]}#{@md.dc_description}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}subject#{Mx[:meta_c]}/ + "\n#{@tr.subject}: #{Mx[:fa_underscore_o]}#{@md.dc_subject}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}abstract#{Mx[:meta_c]}/ + "\n#{@tr.abstract}: #{Mx[:fa_underscore_o]}#{@md.dc_abstract}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}publisher#{Mx[:meta_c]}/ + "\n#{@tr.publisher}: #{Mx[:fa_underscore_o]}#{@md.dc_publisher}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}contributor#{Mx[:meta_c]}/ + "\n#{@tr.contributor}: #{Mx[:fa_underscore_o]}#{@md.dc_contributor}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}date.created#{Mx[:meta_c]}/ + "\n#{@tr.date_created}: #{Mx[:fa_underscore_o]}#{@md.dc_date_created}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}date.issued#{Mx[:meta_c]}/ + "\n#{@tr.date_issued}: #{Mx[:fa_underscore_o]}#{@md.dc_date_issued}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}date.available#{Mx[:meta_c]}/ + "\n#{@tr.date_available}: #{Mx[:fa_underscore_o]}#{@md.dc_date_available}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}date.modified#{Mx[:meta_c]}/ + "\n#{@tr.date_modified}: #{Mx[:fa_underscore_o]}#{@md.dc_date_modified}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}date.valid#{Mx[:meta_c]}/ + "\n#{@tr.date_valid}: #{Mx[:fa_underscore_o]}#{@md.dc_date_valid}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}date#{Mx[:meta_c]}/ + "\n#{@tr.date}: #{Mx[:fa_underscore_o]}#{@md.dc_date}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}type#{Mx[:meta_c]}/ + "\n#{@tr.type}: #{Mx[:fa_underscore_o]}#{@md.dc_type}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}format#{Mx[:meta_c]}/ + "\n#{@tr.format}: #{Mx[:fa_underscore_o]}#{@md.dc_format}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}rights#{Mx[:meta_c]}/ + "\n#{@tr.rights}: #{Mx[:fa_underscore_o]}#{@md.dc_rights}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}identifier#{Mx[:meta_c]}/ + "\n#{@tr.identifier}: #{Mx[:fa_underscore_o]}#{@md.dc_identifier}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}source#{Mx[:meta_c]}/ + "\n#{@tr.source}: #{Mx[:fa_underscore_o]}#{@md.dc_source}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}language#{Mx[:meta_c]}/ + "\n#{@tr.language}: #{Mx[:fa_underscore_o]}#{@md.dc_language}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}language.original#{Mx[:meta_c]}/ + "\n#{@tr.language_original}: #{Mx[:fa_underscore_o]}#{@md.language_original}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}relation#{Mx[:meta_c]}/ + "\n#{@tr.relation}: #{Mx[:fa_underscore_o]}#{@md.dc_relation}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}coverage#{Mx[:meta_c]}/ + "\n#{@tr.coverage}: #{Mx[:fa_underscore_o]}#{@md.dc_coverage}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}keywords#{Mx[:meta_c]}/ + "\n#{@tr.keywords}: #{Mx[:fa_underscore_o]}#{@md.keywords}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}comments#{Mx[:meta_c]}/ + "\n#{@tr.comments}: #{Mx[:fa_underscore_o]}#{@md.comments}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}cls_loc#{Mx[:meta_c]}/ + "\n#{@cls_dewey}: #{Mx[:fa_underscore_o]}#{@md.cls_dewey}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}cls_dewey#{Mx[:meta_c]}/ + "\n#{@tr.cls_dewey}: #{Mx[:fa_underscore_o]}#{@md.cls_dewey}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}(?:cls_gutenberg|cls_pg)#{Mx[:meta_c]}/ + "\n#{@tr.cls_gutenberg}: #{Mx[:fa_underscore_o]}#{@md.cls_gutenberg}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + #"\n#{@tr.cls_gutenberg}: #{@md.cls_pg} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}cls_isbn#{Mx[:meta_c]}/ + "\n#{@tr.cls_isbn}: #{Mx[:fa_underscore_o]}#{@md.cls_isbn}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}prefix(?:_a)?#{Mx[:meta_c]}/ + "\n#{@tr.prefix_a}: #{Mx[:fa_underscore_o]}#{@md.prefix_a}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + when /^#{Mx[:meta_o]}prefix_b#{Mx[:meta_c]}/ + "\n#{@tr.prefix_b}: #{Mx[:fa_underscore_o]}#{@md.prefix_b}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" + else para.gsub(/^#{Mx[:meta_o]}(#{m})\s+(.+)/m,"\n#{m.capitalize}: #{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}") end end end ocnm+=1; ocnv+=1 - head_no_rc="<~0;m#{ocnm};v#{ocnv}>" + head_no_rc="#{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}" ocnm+=1; ocnv+=1 - head_no_rc_tag="<~0;m#{ocnm};v#{ocnv}>" + head_no_rc_tag="#{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}" data.each do |para| case para when /^0~(?:cvs|rcs)\+\s+/ #note the + sign to turn on use of cvs id ocnm+=1; ocnv+=1 - @cvs << "#{@tr.sc_number}: #{@md.sc_number} <~0;m#{ocnm};v#{ocnv}>" + @cvs << "#{@tr.sc_number}: #{Mx[:fa_underscore_o]}#{@md.sc_number}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}" ocnm+=1; ocnv+=1 - @cvs << "#{@tr.sc_date}: #{@md.sc_date} <~0;m#{ocnm};v#{ocnv}>" + @cvs << "#{@tr.sc_date}: #{Mx[:fa_underscore_o]}#{@md.sc_date}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}" ocnm+=1; ocnv+=1 - @cvs << "CVS/RCS time: #{@md.sc_time} <~0;m#{ocnm};v#{ocnv}>" + @cvs << "CVS/RCS time: #{Mx[:fa_underscore_o]}#{@md.sc_time}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}" ocnm+=1; ocnv+=1 when /^0~cvs[+\s]/ #enable pattern above instead if you wish the default to be to include cvs tags from all documents KEEP when /^0~cvs\s+/ #enable pattern above instead if you wish the default to be to include cvs tags from all documents KEEP @@ -1007,47 +1008,45 @@ module SiSU_DAL ocnm+=1; ocnv+=1 if @md.sc_filename \ and @md.sc_filename.length > 3 - @rc << "#{@tr.sourcefile}: #{@md.sc_filename} <~0;m#{ocnm};v#{ocnv}>" - else @rc << "#{@tr.sourcefile}: #{@md.fns} <~0;m#{ocnm};v#{ocnv}>" + @rc << "#{@tr.sourcefile}: #{Mx[:fa_underscore_o]}#{@md.sc_filename}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}" + else @rc << "#{@tr.sourcefile}: #{Mx[:fa_underscore_o]}#{@md.fns}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}" end ocnm+=1; ocnv+=1 if @md.file_encoding \ and @md.file_encoding.length > 3 #translate - @rc << "Filetype: #{@md.file_encoding} <~0;m#{ocnm};v#{ocnv}>" + @rc << "Filetype: #{Mx[:fa_underscore_o]}#{@md.file_encoding}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}" end ocnm+=1; ocnv+=1 if @md.dgst #change. enable by default - @rc << "#{@tr.sourcefile_digest}, #{@md.dgst[0]} #{@md.dgst[1]} <~0;m#{ocnm};v#{ocnv}>" + @rc << "#{@tr.sourcefile_digest}, #{@md.dgst[0]} #{Mx[:fa_underscore_o]}#{@md.dgst[1]}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}" ocnm+=1; ocnv+=1 end if @md.dgst_skin #change. enable by default - @rc << "Skin_Digest: #{@md.dgst_skin[0]} #{@md.dgst_skin[1]} <~0;m#{ocnm};v#{ocnv}>" + @rc << "Skin_Digest: #{@md.dgst_skin[0]} #{Mx[:fa_underscore_o]}#{@md.dgst_skin[1]}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}" ocnm+=1; ocnv+=1 end - @rc << "Generated #{head_no_rc}" if @rc.length > 0 - @rc << "#{@tr.last_generated}: #{Time.now} <~0;m#{ocnm};v#{ocnv}>" + @rc << "#{Mx[:fa_bold_o]}Generated#{Mx[:fa_bold_c]} #{head_no_rc}" if @rc.length > 0 + @rc << "#{@tr.last_generated}: #{Mx[:fa_underscore_o]}#{Time.now}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}" ocnm+=1; ocnv+=1 if @md.sisu_version[:version] - @rc << "#{@tr.sisu_version}: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]}) <~0;m#{ocnm};v#{ocnv}>" + @rc << "#{@tr.sisu_version}: #{Mx[:fa_underscore_o]}#{@md.sisu_version[:project]}#{Mx[:fa_underscore_c]} #{Mx[:fa_underscore_o]}#{@md.sisu_version[:version]}#{Mx[:fa_underscore_c]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]}) #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}" ocnm+=1; ocnv+=1 end - @rc << "#{@tr.ruby_version}: #{@md.ruby_version} <~0;m#{ocnm};v#{ocnv}>" + @rc << "#{@tr.ruby_version}: #{Mx[:fa_underscore_o]} #{@md.ruby_version}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};v#{ocnv}#{Mx[:id_c]}" end - meta << header0 - meta << header1 - meta << header4 - meta << "Document Manifest @\n #{base_html}/#{@md.fn[:manifest]} <~0;m#{ocnm};m#{ocnm}>" - meta << "Dublin Core (DC) #{head_no_dc}" if @dc.length > 0 - meta << "DC tags included with this document are provided here. #{head_no_dc_tag}" if @dc.length > 0 + meta << Mx[:br_page_new] << header1 << header4 + meta << "Document Manifest @\n #{base_html}/#{@md.fn[:manifest]} #{Mx[:id_o]}~0;m#{ocnm};m#{ocnm}#{Mx[:id_c]}" + meta << "#{Mx[:fa_bold_o]}Dublin Core#{Mx[:fa_bold_c]} (DC) #{head_no_dc}" if @dc.length > 0 + meta << "#{Mx[:fa_italics_o]}DC tags included with this document are provided here.#{Mx[:fa_italics_c]} #{head_no_dc_tag}" if @dc.length > 0 @dc.each { |x| meta << x } - meta << "Version Information #{head_no_rc}" if @rc.length > 0 + meta << "#{Mx[:fa_bold_o]}Version Information#{Mx[:fa_bold_c]} #{head_no_rc}" if @rc.length > 0 if @cvs.length > 0 - meta << "Note the version information provided here, is specific to the host site. #{head_no_rc_tag}" + meta << "#{Mx[:fa_italics_o]}Note the version information provided here, is specific to the host site.#{Mx[:fa_italics_c]} #{head_no_rc_tag}" @cvs.each { |x| meta << x } end @rc.each { |x| meta << x } ## ENDNOTE RELATED endnote related - meta << "\n" + meta << "\n#{Mx[:br_eof]}" meta=object_digest(meta) end def stamped(para,hash_class) @@ -1056,23 +1055,23 @@ module SiSU_DAL digest_all=hash_class.hexdigest(para) # print "#{hash_class.name}: "; puts digest_all #length==32 or 64 stripped=strip_clean_of_markup(para) digest_strip=hash_class.hexdigest(stripped) - unless para =~/<:code>/ + unless para =~/#{Mx[:fa_o]}code#{Mx[:fa_c]}/ case para - when /~\{[\d*+]+\s+.+?\}~|~\[[*+]\d+\s+.+?\]~/m + when /#{Mx[:en_a_o]}[\d*+]+\s+.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}[*+]\d+\s+.+?#{Mx[:en_b_c]}/m en_and_para,en_and_para_digest=[],[] - para.gsub!(/\s*(\}~|\]~)/m,' \1') #watch - para_plus_en=para.scan(/.*?~\{.+?\}~|.*?~\[.+?\]~/m) - para_tail=if para =~/(?:.*?~\{.+?\}~|.*?~\[.+?\]~)+([\s\S]+)/m - /(?:.*?~\{.+?\}~|.*?~\[.+?\]~)+(.+?<~\d+;(?:\w|[0-6]:)\d+;\w\d+>)/m.match(para)[1] + para.gsub!(/\s*(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m,' \1') #watch + para_plus_en=para.scan(/.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m) + para_tail=if para =~/(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+([\s\S]+)/m + /(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+(.+?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]})/m.match(para)[1] else '' end para_plus_en << para_tail en_and_para_digest << endnote_digest(para_plus_en) para_new=en_and_para_digest.join(' ') - @tuned << para_new + '<' + digest_strip + ':' + digest_all + '>' unless para.nil? - else @tuned << para + '<' + digest_strip + ':' + digest_all + '>' unless para.nil? + @tuned << para_new + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless para.nil? + else @tuned << para + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless para.nil? end - else @tuned << para + '<' + digest_strip + ':' + digest_all + '>' unless para.nil? + else @tuned << para + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless para.nil? end @tuned.join end @@ -1088,7 +1087,8 @@ module SiSU_DAL data.compact! data.each do |para| para.strip! - if para=~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+>/ + if para=~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}/ \ + and para !~/#{Rx[:meta]}/ #test should not be necessary remove if @env.digest.type =~/sha256/ for hash_class in [ Digest::SHA256 ] @tuned_file << stamped(para,hash_class) @@ -1108,16 +1108,16 @@ module SiSU_DAL para_bit=[] data.each do |en_plus| para_bit <<= case en_plus - when /~\{|~\[/ - if en_plus =~/~\{.+?\}~|~\[.+?\]~/ - para_txt,en_open,en_txt,en_close=/(.*?)(~\{|~\[)(.+?)(\}~|\]~)/m.match(en_plus)[1..4] + when /#{Mx[:en_a_o]}|#{Mx[:en_b_o]}/ + if en_plus =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/ + para_txt,en_open,en_txt,en_close=/(.*?)(#{Mx[:en_a_o]}|#{Mx[:en_b_o]})(.+?)(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m.match(en_plus)[1..4] stripped_en=strip_clean_of_markup(en_txt) digest_en_strip=if @env.digest.type =~/sha256/ Digest::SHA256.hexdigest(stripped_en) else Digest::MD5.hexdigest(stripped_en) end - para_txt + en_open + en_txt + '<' + digest_en_strip + '>' + en_close + para_txt + en_open + en_txt + Mx[:id_o] + digest_en_strip + Mx[:id_c] + en_close else puts "Error Exception - problem encountered with:\n#{en_plus}" #arbitrary exception, tidy up end else en_plus @@ -1131,12 +1131,13 @@ module SiSU_DAL s=s.gsub(/ [ ]+/,' ') s=s.gsub(/^ [ ]+/,'') s=s.gsub(/ [ ]+$/,'') - s=s.gsub(/(<\/[bi]>')[ ]+(s )/,'\1\2') + s=s.gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2') + s=s.gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2') end def strip_clean_of_markup(s) # used for digest, define rules, make same as in db clean #consider: <\/?[ib]>|<(?:\/ )?br>|(.+?)<\/del> s=s.dup - s=s.gsub(/(?:<\/?[ib]>|<~\d+;(?:\w|[0-6]:)\d+;\w\d+>|<#@dp:#@dp>|^[1-6]~\S+|~\{\d+\s.+?\}~)/m,'') # markup and endnotes removed + s=s.gsub(/(?:<\/?[ib]>|#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}|#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}|^#{Mx[:lv_o]}[1-6]:\S+?#{Mx[:lv_c]}|#{Mx[:en_a_o]}\d+\s.+?#{Mx[:en_a_c]})/m,'') # markup and endnotes removed #% same as db clean --> s=s.gsub(/(.+?)<\/del>/,'DELETED(\1)') # deletions s=s.gsub(/(\d+)<\/sup>/,'[\1]') diff --git a/lib/sisu/v0/dal_doc_str.rb b/lib/sisu/v0/dal_doc_str.rb index ebaa8625..37e9be3c 100644 --- a/lib/sisu/v0/dal_doc_str.rb +++ b/lib/sisu/v0/dal_doc_str.rb @@ -71,14 +71,14 @@ module SiSU_document_structure @para end def structure_markup - @para=unless @para =~/[0-6]~/ + @para=unless @para =~/#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/ @para=case @para - when /^\s*#{@md.lv1}/; @para.sub!(/(?:<[:!]1!?>\s*)?(.+)/,'1~ \1') if @para !~/^1~/ - when /^\s*#{@md.lv2}/; @para.sub!(/(?:<[:!]2!?>\s*)?(.+)/,'2~ \1') if @para !~/^2~/ - when /^\s*#{@md.lv3}/; @para.sub!(/(?:<[:!]3!?>\s*)?(.+)/,'3~ \1') if @para !~/^3~/ - when /^\s*#{@md.lv4}/; @para.sub!(/(?:<[:!]4!?>\s*)?(.+)/,'4~ \1') if @para !~/^4~/ - when /^\s*#{@md.lv5}/; @para.sub!(/(?:<[:!]5!?>\s*)?(.+)/,'5~ \1') if @para !~/^5~/ - when /^\s*#{@md.lv6}/; @para.sub!(/(?:<[:!]6!?>\s*)?(.+)/,'6~ \1') if @para !~/^6~/ + when /^\s*#{@md.lv1}/; @para.sub!(/(?:<[:!]1!?>\s*)?(.+)/,"#{Mx[:lv_o]}1:#{Mx[:lv_c]} \\1") if @para !~/^#{Mx[:lv_o]}1:\S*?#{Mx[:lv_c]}/ + when /^\s*#{@md.lv2}/; @para.sub!(/(?:<[:!]2!?>\s*)?(.+)/,"#{Mx[:lv_o]}2:#{Mx[:lv_c]} \\1") if @para !~/^#{Mx[:lv_o]}2:\S*?#{Mx[:lv_c]}/ + when /^\s*#{@md.lv3}/; @para.sub!(/(?:<[:!]3!?>\s*)?(.+)/,"#{Mx[:lv_o]}3:#{Mx[:lv_c]} \\1") if @para !~/^#{Mx[:lv_o]}3:\S*?#{Mx[:lv_c]}/ + when /^\s*#{@md.lv4}/; @para.sub!(/(?:<[:!]4!?>\s*)?(.+)/,"#{Mx[:lv_o]}4:#{Mx[:lv_c]} \\1") if @para !~/^#{Mx[:lv_o]}4:\S*?#{Mx[:lv_c]}/ + when /^\s*#{@md.lv5}/; @para.sub!(/(?:<[:!]5!?>\s*)?(.+)/,"#{Mx[:lv_o]}5:#{Mx[:lv_c]} \\1") if @para !~/^#{Mx[:lv_o]}5:\S*?#{Mx[:lv_c]}/ + when /^\s*#{@md.lv6}/; @para.sub!(/(?:<[:!]6!?>\s*)?(.+)/,"#{Mx[:lv_o]}6:#{Mx[:lv_c]} \\1") if @para !~/^#{Mx[:lv_o]}6:\S*?#{Mx[:lv_c]}/ else @para end else @para @@ -91,12 +91,18 @@ module SiSU_document_structure #@para.gsub!(/^5~/,'8~') #@para.gsub!(/^4~/,'7~') @para.gsub!(/^[456]~/,'!_') - @para.gsub!(/^3~/,'6~') - @para.gsub!(/^2~/,'5~') - @para.gsub!(/^1~/,'4~') - @para.gsub!(/^:?C~/,'3~') - @para.gsub!(/^:?B~/,'2~') - @para.gsub!(/^:?A~/,'1~') + @para.gsub!(/^3~(\S+)/,"#{Mx[:lv_o]}6:\\1#{Mx[:lv_c]}") + @para.gsub!(/^3~\s+/,"#{Mx[:lv_o]}6:#{Mx[:lv_c]}") + @para.gsub!(/^2~(\S+)/,"#{Mx[:lv_o]}5:\\1#{Mx[:lv_c]}") + @para.gsub!(/^2~\s+/,"#{Mx[:lv_o]}5:#{Mx[:lv_c]}") + @para.gsub!(/^1~(\S+)/,"#{Mx[:lv_o]}4:\\1#{Mx[:lv_c]}") + @para.gsub!(/^1~\s+/,"#{Mx[:lv_o]}4:#{Mx[:lv_c]}") + @para.gsub!(/^:?C~(\S+)/,"#{Mx[:lv_o]}3:\\1#{Mx[:lv_c]}") + @para.gsub!(/^:?C~\s+/,"#{Mx[:lv_o]}3:#{Mx[:lv_c]}") + @para.gsub!(/^:?B~(\S+)/,"#{Mx[:lv_o]}2:\\1#{Mx[:lv_c]}") + @para.gsub!(/^:?B~\s+/,"#{Mx[:lv_o]}2:#{Mx[:lv_c]}") + @para.gsub!(/^:?A~(\S+)/,"#{Mx[:lv_o]}1:\\1#{Mx[:lv_c]}") + @para.gsub!(/^:?A~\s+/,"#{Mx[:lv_o]}1:#{Mx[:lv_c]}") @para=if @para =~/^@(?:level|markup):\s/ @para.gsub!(/3/,'6') @para.gsub!(/2/,'5') @@ -110,6 +116,25 @@ module SiSU_document_structure else @para end end + def structure_marks + para=if @md.markup_version.to_f < 0.38 + @para.gsub!(/^1~(\S+)/,"#{Mx[:lv_o]}1:\\1#{Mx[:lv_c]}") + @para.gsub!(/^1~\s+/,"#{Mx[:lv_o]}1:#{Mx[:lv_c]}") + @para.gsub!(/^2~(\S+)/,"#{Mx[:lv_o]}2:\\1#{Mx[:lv_c]}") + @para.gsub!(/^2~\s+/,"#{Mx[:lv_o]}2:#{Mx[:lv_c]}") + @para.gsub!(/^3~(\S+)/,"#{Mx[:lv_o]}3:\\1#{Mx[:lv_c]}") + @para.gsub!(/^3~\s+/,"#{Mx[:lv_o]}3:#{Mx[:lv_c]}") + @para.gsub!(/^4~(\S+)/,"#{Mx[:lv_o]}4:\\1#{Mx[:lv_c]}") + @para.gsub!(/^4~\s+/,"#{Mx[:lv_o]}4:#{Mx[:lv_c]}") + @para.gsub!(/^5~(\S+)/,"#{Mx[:lv_o]}5:\\1#{Mx[:lv_c]}") + @para.gsub!(/^5~\s+/,"#{Mx[:lv_o]}5:#{Mx[:lv_c]}") + @para.gsub!(/^6~(\S+)/,"#{Mx[:lv_o]}6:\\1#{Mx[:lv_c]}") + @para.gsub!(/^6~\s+/,"#{Mx[:lv_o]}6:#{Mx[:lv_c]}") + @para.gsub!(/^[789]~/,'!_') + @para + else @para + end + end end class Struct def initialize(o) @@ -147,21 +172,21 @@ module SiSU_document_structure #headings=if @md.ocn.inspect =~/skip=headings/; '^(?:[A-C]|[1-9])~\S* |' #else '' #end - regex_exclude_ocn_and_node = /#{headings}^%{1,4}\s|^@\S+?:\s|^0~|^4~endnotes|^<\/center>|<:ee>|<:e[:_]>|^\^~ |<:e[:_]\d+?>|^<:p[bn]>|^<:\#|<:- |<[:!]!4|||||<\/tr>|
|\[endnotes\]|<:zz>|<:isbn-|<:journal-|<:conference-|/i #ocn here #  added with Tune.code #¡ + regex_exclude_ocn_and_node = /#{headings}^%{1,4}\s|#{Rx[:meta]}|^@\S+?:\s|^0~|^4~endnotes|^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^<\/center>|<:ee>|<:e[:_]>|^\^~ |<:e[:_]\d+?>|^#{Mx[:fa_o]}p[bn]#{Mx[:fa_c]}|^<:\#|<:- |<[:!]!4|||||<\/tr>|
|\[endnotes\]|<:zz>|<:isbn-|<:journal-|<:conference-|#{Mx[:br_endnotes]}/i #ocn here #  added with Tune.code #¡ regex_exclude_ocn = /^(?:alt|code|group|poem|table)\{|^\}(?:alt|code|group|poem|table)|^\}table$/ #ocn here #  added with Tune.code #¡ data.each do |para| o={} if para =~/\w|\S|<|\(/ if para !~ regex_exclude_ocn_and_node if node_count_flag \ - or para=~/^1~/ + or para=~/^#{Mx[:lv_o]}1:/ node_count_flag=true end node+=1 if node_count_flag if para !~ regex_exclude_ocn # regex_exclude_large previously excluded unless para=~/<:#>|~#|-#/ # |^\s*\*\s*\*\s*\*\s*$ <-consider leaving un-numbered ocn+=1 - if para=~/^[1-8]~(?:\s+|\S)/ \ + if para=~/^#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/ \ or para =~@md.lv1 \ or para =~@md.lv2 \ or para =~@md.lv3 \ @@ -169,33 +194,33 @@ module SiSU_document_structure or para =~@md.lv5 \ or para =~@md.lv6 ocnh+=1 - if para=~/^1~(?:\s+|\S)/ \ + if para=~/^#{Mx[:lv_o]}1:(\S*?)#{Mx[:lv_c]}/ \ or para =~@md.lv1 ocnh1+=1 #heading ocn_dv,ocn_sp="1:#{ocnh1}","h#{ocnh}" - elsif para=~/^2~(?:\s+|\S)/ \ + elsif para=~/^#{Mx[:lv_o]}2:(\S*?)#{Mx[:lv_c]}/ \ or para =~@md.lv2; ocnh2+=1 ocn_dv,ocn_sp="2:#{ocnh2}","h#{ocnh}" - elsif para=~/^3~(?:\s+|\S)/ \ + elsif para=~/^#{Mx[:lv_o]}3:(\S*?)#{Mx[:lv_c]}/ \ or para =~@md.lv3; ocnh3+=1 ocn_dv,ocn_sp="3:#{ocnh3}","h#{ocnh}" - elsif para=~/^4~(?:\s+|\S)/ \ + elsif para=~/^#{Mx[:lv_o]}4:(\S*?)#{Mx[:lv_c]}/ \ or para =~@md.lv4; ocnh4+=1 ocn_dv,ocn_sp="4:#{ocnh4}","h#{ocnh}" - elsif para=~/^5~(?:\s+|\S)/ \ + elsif para=~/^#{Mx[:lv_o]}5:(\S*?)#{Mx[:lv_c]}/ \ or para =~@md.lv5; ocnh5+=1 ocn_dv,ocn_sp="5:#{ocnh5}","h#{ocnh}" - elsif para=~/^6~(?:\s+|\S)/ \ + elsif para=~/^#{Mx[:lv_o]}6:(\S*?)#{Mx[:lv_c]}/ \ or para =~@md.lv6; ocnh6+=1 ocn_dv,ocn_sp="6:#{ocnh6}","h#{ocnh}" end else ocno+=1 - if para=~//; ocnt+=1 #table + if para=~/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/; ocnt+=1 #table ocn_dv,ocn_sp="o#{ocno}","t#{ocnt}" - elsif para=~/^<:code>/; ocnc+=1 #code block + elsif para=~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/; ocnc+=1 #code block ocn_dv,ocn_sp="o#{ocno}","c#{ocnc}" - elsif para=~/^<:(?:group|alt|verse)>/; ocng+=1 #group, poem + elsif para=~/^#{Mx[:gr_o]}(?:group|alt|verse)#{Mx[:gr_c]}/; ocng+=1 #group, poem ocn_dv,ocn_sp="o#{ocno}","g#{ocng}" elsif para=~/\{\S+?\.(?:png|jpg|gif)\s+/m; ocni+=1 #image ocn_dv,ocn_sp="o#{ocno}","i#{ocni}" @@ -212,7 +237,7 @@ module SiSU_document_structure # ocn_dv,ocn_sp="u#{ocnu}","u#{ocnu}" #end #para.gsub!(/<~#>|<-#>/,'') if para #get rid of need - para.gsub!(/<~#>/,'') if para + para.gsub!(/#{Mx[:fa_o]}~##{Mx[:fa_c]}/,'') if para ocn_dv,ocn_sp="u#{ocnu}","u#{ocnu}" o[:txt],o[:node],o[:ocn],o[:lv],o[:type]=para,node,0,ocn_dv,ocn_sp end @@ -225,9 +250,9 @@ module SiSU_document_structure end end end - para.gsub!(/\n\n/,"\n") if para =~/<:(?:code|verse|alt|group)>/ #newlines taken out - para.gsub!(/(<:(?:code-end)>)/,"\n\\1") if para =~/<:(?:code-end)>/ #newlines added check - if para =~//u,"\\1~#{o[:ocn]};#{o[:lv]};#{o[:type]}>") + para.gsub!(/\n\n/,"\n") if para =~/#{Mx[:gr_o]}(?:code|verse|alt|group)#{Mx[:gr_c]}/ #newlines taken out + para.gsub!(/(#{Mx[:gr_o]}(?:code-end)#{Mx[:gr_c]})/,"\n\\1") if para =~/#{Mx[:gr_o]}(?:code-end)#{Mx[:gr_c]}/ #newlines added check + if para =~/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}/u; para.gsub!(/(#{Mx[:gr_o]}Th?#{Mx[:tc_p]}.+?)#{Mx[:gr_c]}/u,"\\1#{Mx[:tc_p]}~#{o[:ocn]};#{o[:lv]};#{o[:type]}#{Mx[:gr_c]}") end @o_array << Struct.new(o).structure if o end diff --git a/lib/sisu/v0/dal_doc_str_code.rb b/lib/sisu/v0/dal_doc_str_code.rb index 9596bb69..82ada040 100644 --- a/lib/sisu/v0/dal_doc_str_code.rb +++ b/lib/sisu/v0/dal_doc_str_code.rb @@ -80,30 +80,30 @@ module SiSU_document_structure_code @@counter=0 @verse_count=0 data.each do |para| - para.gsub!(/(<:(?:code|verse|alt|group)>)\s/,'\1') #double check + para.gsub!(/(#{Mx[:gr_o]}(?:code|verse|alt|group)#{Mx[:gr_c]})\s/,'\1') #double check para.gsub!(/(?:\n\s*\n)+/m,"\n") unless @@flag['code'] - if para =~/^code\{/ + if para =~/^code\{/ and not @@flag['code'] @@flag['code']=true @@counter=1 - para.gsub!(/^code\{.*/,'<:code>') + para.gsub!(/^code\{.*/,"#{Mx[:gr_o]}code#{Mx[:gr_c]}") elsif para =~/^(?:poem)\{/ \ and not @@flag['code'] @@flag['poem']=true - para.gsub!(/^(poem)\{.*/,'<:verse>') + para.gsub!(/^(poem)\{.*/,"#{Mx[:gr_o]}verse#{Mx[:gr_c]}") elsif para =~/^(?:alt|group)\{/ \ and not @@flag['code'] #group not tested, stub 2005 @@flag['group']=true - para.gsub!(/^(alt|group)\{.*/,'<:\1>') + para.gsub!(/^(alt|group)\{.*/,"#{Mx[:gr_o]}\\1#{Mx[:gr_c]}") end if @@flag['code'] if @@flag['code'] \ and para =~/^\}code/ - para.gsub!(/^\}code.*/,'<:code-end>') + para.gsub!(/^\}code.*/,"#{Mx[:gr_o]}code-end#{Mx[:gr_c]}") @@flag['code']=false end if @@flag['code'] \ - or para =~/<:code-end>/ #and para =~/\S/ - sub_array=if para !~/<:code(?:-end)?>/; para.dup + '<:br>' + or para =~/#{Mx[:gr_o]}code-end#{Mx[:gr_c]}/ #and para =~/\S/ + sub_array=if para !~/#{Mx[:gr_o]}code(?:-end)?#{Mx[:gr_c]}/; para.dup + "#{Mx[:br_nl]}" else para.dup end @line_mode=sub_array.scan(/.+/) @@ -117,18 +117,18 @@ module SiSU_document_structure_code and not @@flag['code'] if @@flag['poem'] \ and para =~/^\}(?:poem)/ - para.gsub!(/^\}(poem).*/,"\n<:verse-end>") + para.gsub!(/^\}(poem).*/,"\n#{Mx[:gr_o]}verse-end#{Mx[:gr_c]}") @@flag['poem']=false elsif ( @@flag['group'] \ and para =~/^\}(?:alt|group)/ ) \ and not @@flag['code'] - para.gsub!(/^\}(alt|group).*/,'<:\1-end>') + para.gsub!(/^\}(alt|group).*/,"#{Mx[:gr_o]}\\1-end#{Mx[:gr_c]}") @@flag['group']=false end if @@flag['poem'] \ or @@flag['group'] \ and para =~/\S/ \ - and para !~/<:(verse|code|alt|group)(-end)?>/ \ + and para !~/#{Mx[:gr_o]}(verse|code|alt|group)(-end)?#{Mx[:gr_c]}/ \ and not @@flag['code'] sub_array=para.dup @line_mode=sub_array.scan(/.+/) @@ -138,7 +138,7 @@ module SiSU_document_structure_code SiSU_document_structure_code::Code.new(@md,@line_mode).code_lines(type) div=if @@flag['poem'] \ and @verse_count > 0 - "\n" + '<:verse-end>' + "\n\n" + '<:verse>' + "\n" + "#{Mx[:gr_o]}verse-end#{Mx[:gr_c]}" + "\n\n" + "#{Mx[:gr_o]}verse#{Mx[:gr_c]}" else '' end @verse_count+=1 if @@flag['poem'] @@ -150,13 +150,13 @@ module SiSU_document_structure_code or @@flag['poem'] \ or @@flag['group'] @tuned_group << para if para =~/\S+/ - elsif para =~/<:(?:code|alt|group|verse)-end>/ + elsif para =~/#{Mx[:gr_o]}(?:code|alt|group|verse)-end#{Mx[:gr_c]}/ @verse_count=0 @tuned_group << para - tuned_file <<= if para =~/<:group-end>/ + tuned_file <<= if para =~/#{Mx[:gr_o]}group-end#{Mx[:gr_c]}/ @tuned_group.join("\n") else @tuned_group=@tuned_group.join - @tuned_group.gsub!(/<:br><:br>(<:code-end>)/,'\1') + @tuned_group.gsub!(/#{Mx[:br_nl]}#{Mx[:br_nl]}(#{Mx[:gr_o]}code-end#{Mx[:gr_c]})/,'\1') @tuned_group end @tuned_group=[] @@ -169,18 +169,18 @@ module SiSU_document_structure_code data=@data data.each do |line| if line =~/\S/ \ - and line !~/^code\{|^\}code|<:code.+/ + and line !~/^code\{|^\}code|#{Mx[:gr_o]}code.+/ line.gsub!(/\s\s/,'  ') - line.gsub!(/^/,'<:codeline>') if type=='code' # try sort for texpdf special case + line.gsub!(/^/,"#{Mx[:gr_o]}codeline#{Mx[:gr_c]}") if type=='code' # try sort for texpdf special case if line =~/(?:https?|file|ftp):\/\/\S+$/ - line.gsub!(/$/,' <:br>') - else line.gsub!(/$/,'<:br>') #unless type=='code' + line.gsub!(/$/," #{Mx[:br_nl]}") + else line.gsub!(/$/,"#{Mx[:br_nl]}") #unless type=='code' end if @@flag['code']; @@counter+=1 else end elsif line =~/^\s*$/ - line.gsub!(/$/,' <:br>') + line.gsub!(/$/,"#{Mx[:br_nl]}") end end end diff --git a/lib/sisu/v0/dal_doc_str_tables.rb b/lib/sisu/v0/dal_doc_str_tables.rb index a17eb63f..5b636dfa 100644 --- a/lib/sisu/v0/dal_doc_str_tables.rb +++ b/lib/sisu/v0/dal_doc_str_tables.rb @@ -97,29 +97,29 @@ module SiSU_document_structure_tables w=widths.split(/;/) tuned_file=[] tuned_file << if para =~/\{(?:t|table)~h(?:\sc\d+;)?[\d; ]*\}/ - "" + "#{Mx[:gr_o]}Th#{Mx[:tc_p]} c#{@row[2].length}; #{widths}#{Mx[:gr_c]}" elsif para =~/\{(?:t|table)(?:\sc\d+;)?[\d; ]*\}/ - "" + "#{Mx[:gr_o]}T#{Mx[:tc_p]} c#{@row[2].length}; #{widths}#{Mx[:gr_c]}" end @row.each do |l| l << '' if l.length == (@row[1].length - 1) table=[] if l.length == @row[1].length - table << '' + table << Mx[:tc_o] #'' n= -1 l.each do |c| n +=1 #'' + c + '' table << if c =~/\A(?:\n)?\s*\Z/ - "#{TS1}#{TS1}#{w[n]}#{TS1}" + ' ' + "#{Mx[:tc_p]}#{Mx[:tc_p]}#{w[n]}#{Mx[:tc_p]}" + ' ' else - "#{TS1}#{TS1}#{w[n]}#{TS1}" + c.strip + "#{Mx[:tc_p]}#{Mx[:tc_p]}#{w[n]}#{Mx[:tc_p]}" + c.strip end end - table << '!>' #'' + table << Mx[:tc_c] tuned_file << table.join end end - tuned_file << '' #'' + tuned_file << "#{Mx[:gr_o]}TZ#{Mx[:gr_c]}" #'' tuned_file=tuned_file.compact.join("\n") end def tables @@ -141,13 +141,13 @@ module SiSU_document_structure_tables instructions=$1 @@column=instructions.split(/;\s*/) @@columns=@@column[0] - para.gsub!(/^table\{~h\s+(c\d+?;.+?)$/,"") - para.gsub!(/^table\{\s+(c\d+?;.+?)$/,"") + para.gsub!(/^table\{~h\s+(c\d+?;.+?)$/,"#{Mx[:gr_o]}Th#{Mx[:tc_p]} \\1#{Mx[:gr_c]}") + para.gsub!(/^table\{\s+(c\d+?;.+?)$/,"#{Mx[:gr_o]}T#{Mx[:tc_p]} \\1#{Mx[:gr_c]}") @@flag['table_to']=true end if @@flag['table_to'] \ and para =~/\}table/ - para.gsub!(/^\}table\s*$/,"\n") + para.gsub!(/^\}table\s*$/,"#{Mx[:gr_o]}TZ#{Mx[:gr_c]}\n") para.gsub!(/\n/,' ') #newlines taken out para.strip! @tuned_table << para @@ -164,13 +164,13 @@ module SiSU_document_structure_tables @@line_mode=sub_array.scan(/.+/u) Tables.new(@md,@@line_mode).tr_td para=@@line_mode.join - para.gsub!(/(.*\S+.*)\Z/m,'') unless para =~// + para.gsub!(/\n/,' ') unless para =~/#{Mx[:gr_o]}(?:code|verse|alt|group)#{Mx[:gr_c]}/ para.strip! @tuned_file << para end @@ -182,11 +182,11 @@ module SiSU_document_structure_tables data=@data data.each do |line| if @@counter <= @@columns.to_i \ - and line !~/(\}T\s*$|<:table[-_](close|end)>|)/ + and line !~/(\}T\s*$|#{Mx[:gr_o]}:table[-_](close|end)#{Mx[:gr_c]}|#{Mx[:gr_o]}TZ#{Mx[:gr_c]})/ #if line.encoding.inspect =~/Encoding:ASCII-8BIT/ # line=line.force_encoding('utf-8') #end - line.gsub!(/(.+)/,"#{TS1}#{TS1}#{@@column[@@counter]}#{TS1}\\1") unless line =~/|<:br>' + @http_m=%r{\{.+?\}(?:https?|file)://\S+|(?:https?|file):\S+|\.\.\/\S+|\S+?\.png\b|[*]~\S+|^#{Mx[:meta_o]}.+|#{Mx[:gr_o]}(?:code|group|alt|verse)(?:-end)?#{Mx[:gr_c]}|#{Mx[:fa_o]}:br#{Mx[:fa_c]}} @manmkp_ital='[i/]\\{.+?\\}[i/]' tail_m_ital=%q{(?:\s|[.,;:?!'")]|~\^|~\\\{\s|$)} - tail_m_bold=%q{(?:(?:<\/i>)?(?:\s|[.,;:?!'")]|~\^|~\\\{\s|$))?} - bold_line=%q{^!_\s.+?(?:
|\n|$)} + tail_m_bold=%{(?:(?:#{Mx[:fa_italics_c]})?(?:\s|[.,;:?!'")]|~\^|~\\\{\s|$))?} + bold_line=%{^!_\s.+?(?:#{Mx[:br_line]}|\n|$)} @line_scan_ital=if defined? @md.make_italic[:str] \ and defined? @vz.markup_make_italic[:str] /#@http_m|#{bold_line}|#@manmkp_ital#{tail_m_ital}|(?:#{@md.make_italic[:str]}|#{@vz.markup_make_italic[:str]})#{tail_m_ital}|\S+|\n/ @@ -114,6 +114,8 @@ module Syntax end def pre(line) line=line.dup + line.gsub!(/^0~(\S+)/,"#{Mx[:lv_o]}@\\1#{Mx[:lv_c]}") + line.gsub!(/^@(\S+?):/,"#{Mx[:lv_o]}@\\1#{Mx[:lv_c]}") if line =~/\{(?:t|table)(?:~h)?\s*c?[\d; ]*\}/; line.gsub!(/(\n)/,';;\1') #markup for alternative tables end line @@ -124,7 +126,7 @@ module Syntax and @md.make_italic[:str]) \ or (defined? @vz.markup_make_italic[:str] \ and @vz.markup_make_italic[:str]) - line= if line !~/^(?:0~|%{1,4}\s|<:code)/ #!~/^(?:[0-6]~|!_|%+\s)/ + line= if line !~/^(?:#{Rx[:meta]}|#{Mx[:gr_o]}code)/ #!~/^(?:[0-6]~|!_|%+\s)/ word=line.scan(@line_scan_ital) word.flatten! word.compact! #reinstated @@ -133,10 +135,10 @@ module Syntax unless /#@manmkp_ital|#@http_m/.match(w) if defined? @md.make_italic[:regx] \ and @md.make_italic[:regx] - w.gsub!(@md.make_italic[:regx],'\1') + w.gsub!(@md.make_italic[:regx],"#{Mx[:fa_italics_o]}\\1#{Mx[:fa_italics_c]}") elsif defined? @vz.markup_make_italic \ and @vz.markup_make_italic - w.gsub!(@vz.markup_make_italic,'\1') + w.gsub!(@vz.markup_make_italic,"#{Mx[:fa_italics_o]}\\1#{Mx[:fa_italics_c]}") end end line_array << w @@ -148,10 +150,10 @@ module Syntax line end def embolden(given) - given.gsub!(/(?:^!_|^[7-9]~)\s+(.+?)(
)/,'\1\2') - given.gsub!(/(?:^!_|^[7-9]~)\s+(.+?)\s+((?:[*]~\S+\s*)+)/,'\1 \2') - given.gsub!(/(?:^!_|^[7-9]~)\s+(.+?)\s*([~-]#)$/,'\1 \2') - given.gsub!(/(?:^!_\s+|^[7-9]~\s+)(.*)?\s*$/,'\1') + given.gsub!(/(?:^!_|^#{Mx[:lv_o]}[7-9]:\S*?#{Mx[:lv_c]})\s*(.+?)(
)/,"#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}\\2") + given.gsub!(/(?:^!_|^#{Mx[:lv_o]}[7-9]:\S*?#{Mx[:lv_c]})\s*(.+?)\s+((?:[*]~\S+\s*)+)/,"#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}\\2") + given.gsub!(/(?:^!_|^#{Mx[:lv_o]}[7-9]:\S*?#{Mx[:lv_c]})\s*(.+?)\s*([~-]#)$/,"#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}\\2") + given.gsub!(/(?:^!_\s+|^#{Mx[:lv_o]}[7-9]:\S*?#{Mx[:lv_c]}\s*)(.*)?\s*$/,"#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}") end def wordlist_bold(line) line=line.dup @@ -159,7 +161,7 @@ module Syntax and @md.make_bold[:str]) \ or (defined? @vz.markup_make_bold[:str] \ and @vz.markup_make_bold[:str]) - line=if line !~/^(?:[0-9]~|%+\s|<:code)/ + line=if line !~/^(?:#{Rx[:meta]}|#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}|%+\s|#{Mx[:gr_o]}code)/ line_array=[] word=line.scan(@line_scan_bold) word.flatten! @@ -168,13 +170,13 @@ module Syntax unless /#@manmkp_bold|#@http_m/.match(w) if defined? @md.make_bold[:regx] \ and @md.make_bold[:regx] #document header: 0~bold [bold word list] - w.gsub!(@md.make_bold[:regx],'\1') + w.gsub!(@md.make_bold[:regx],"#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}") elsif defined? @vz.markup_make_bold \ and @vz.markup_make_bold #defaults and skin adjusted bold word list - w.gsub!(@vz.markup_make_bold,'\1') + w.gsub!(@vz.markup_make_bold,"#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}") end else - if w =~ /(?:^!_|^[7-9]~)\s+/; embolden(w) #bold paragraph/emphasize #may wish to remove think about 7{ 8{ conversion not satisfactory, as information is lost! + if w =~ /(?:^!_|^#{Mx[:lv_o]}[7-9]:\S*?#{Mx[:lv_c]})\s+/; embolden(w) #bold paragraph/emphasize #may wish to remove think about 7{ 8{ conversion not satisfactory, as information is lost! end end line_array << w @@ -183,8 +185,8 @@ module Syntax else line end else - if line !~/^(?:[0-9]~|%+\s)/ \ - and line =~ /(?:^!_|^[7-9]~)\s+/ + if line !~/^(?:#{Mx[:lv_o]}[0-9]:\S*?#{Mx[:lv_c]}|%+\s)/ \ + and line =~ /(?:^!_|^#{Mx[:lv_o]}[7-9]:\S*?#{Mx[:lv_c]})\s+/ embolden(line) end end @@ -218,107 +220,126 @@ module Syntax # # #numbered (list) level 1 # _# #numbered (list) level 2 line=line.dup - if line !~/^0~|<:codeline>|<:code-end>/ + if line !~/^#{Mx[:meta_o]}|#{Mx[:gr_o]}codeline#{Mx[:gr_c]}|#{Mx[:gr_o]}code-end#{Mx[:gr_c]}/ #special characters: ~ { } < > - _ / also used : ^ ! # line_array=[] line.gsub!(/^%{1,4} .+/mi,'') #remove comments - word=line.scan(/\S+|\n/) unless line =~/^(?:0~\S|%+\s)/ + word=line.scan(/\S+|\n/) unless line =~/^(?:#{Mx[:meta_o]}|%+\s)/ #visit if word word.each do |w| # _ - / # | : ! ^ ~ - unless w =~/^[0-9]~|~\{|\}~|~\[|\]~|^\^~|~\^|\*~\S+|~#|\{t?~|\{table/ - w.gsub!(/\\~/,'~') #escaped special character - w.gsub!(/~/,'~') + unless w =~/^#{Mx[:lv_o]}[0-9]:\S*?#{Mx[:lv_c]}|~\{|\}~|~\[|\]~|^\^~|~\^|\*~\S+|~#|\{t?~|\{table/ + w.gsub!(/\\?~/,"#{Mx[:gl_o]}#126#{Mx[:gl_c]}") #escaped special character + #w.gsub!(/~/,"#{Mx[:gl_o]}#126#{Mx[:gl_c]}") end - w.gsub!(/^\<$/,'<') #escaped special character - w.gsub!(/^\>$/,'>') #escaped special character + w.gsub!(/^\<$/,"#{Mx[:gl_o]}#lt#{Mx[:gl_c]}") #escaped special character + w.gsub!(/^\>$/,"#{Mx[:gl_o]}#gt#{Mx[:gl_c]}") #escaped special character line_array << w end line=line_array.join(' ') line=line.strip end - line.gsub!(/^(1~\??) @title\s+(?:(by\s+)?(?:@creator|@author))\s*$/,"\\1 #{@md.title} - #{@md.subtitle},
\\2#{@md.dc_creator}") + line.gsub!(/~\{(.+?)\}~/m,"#{Mx[:en_a_o]}\\1#{Mx[:en_a_c]}") + line.gsub!(/~\[(.+?)\]~/m,"#{Mx[:en_b_o]}\\1#{Mx[:en_b_c]}") + line.gsub!(/^(#{Mx[:lv_o]}1:\??#{Mx[:lv_c]})\s*@title\s+(?:(by\s+)?(?:@creator|@author))\s*$/,"\\1 #{@md.title} - #{@md.subtitle},
\\2#{@md.dc_creator}") line.gsub!(/^(1~\??) @title\s*$/,"\\1 #{@md.title} - #{@md.subtitle}") # - line.gsub!(/^([23]~\??) (?:(by\s+)?(?:@creator|@author))\s*$/,"\\1 \\2#{@md.dc_creator}") # - line.gsub!(/<((?:https?|file):\/\/\S+?)>/,'< \1 >') #catch problem markup - line.gsub!(/\}\.\.\/(\S+)/,"\}#@output_url/\\1") #means you are not supporting relative links (only relevant in html), converted to static here + line.gsub!(/^(#{Mx[:lv_o]}[23]:\??#{Mx[:lv_c]}) (?:(by\s+)?(?:@creator|@author))\s*$/,"\\1 \\2#{@md.dc_creator}") # + line.gsub!(/<((?:https?|file):\/\/\S+?)>/,'< \1 >') #catch problem markup + line.gsub!(/\}\.\.\/(\S+)/,"\}#@output_url/\\1") #means you are not supporting relative links (only relevant in html), converted to static here line.gsub!(/<:=(\S+?)>/,'{ c_\1.png 14x14 }http://www.jus.uio.no/sisu') #adjustment 2005w30 line.gsub!(//,'<:\1>') #escaped special character - line.gsub!(/\\~/,'~') #escaped special character - line.gsub!(/\\\{/,'{') #escaped special character - line.gsub!(/\\\}/,'}') #escaped special character - line.gsub!(/\\\<>/,'>>') #escaped special character - line.gsub!(/\\\/,'>') #escaped special character - line.gsub!(/\\\_/,'_') #escaped special character - line.gsub!(/\\\-/,'-') #escaped special character - line.gsub!(/\\\+/,'+') #escaped special character - line.gsub!(/\\\//,'/') #escaped special character - line.gsub!(/\\\#/,'#') #escaped special character - line.gsub!(/\\\&/,'&') #& #escaped special character - line.gsub!(/\\\|/,'|') #not really a sisu special character but made available as possibility - line.gsub!(/\\\:/,':') #not really a sisu special character but made available as possibility - line.gsub!(/\\\!/,'!') #not really a sisu special character but made available as possibility - line.gsub!(/\\\^/,'^') #not really a sisu special character but made available as possibility - line.gsub!(/\\\,/,',') #not really a sisu special character but made available as possibility + line.gsub!(/\\~/,"#{Mx[:gl_o]}#126#{Mx[:gl_c]}") #escaped special character + line.gsub!(/\\\{/,"#{Mx[:gl_o]}#123#{Mx[:gl_c]}") #escaped special character + line.gsub!(/\\\}/,"#{Mx[:gl_o]}#125#{Mx[:gl_c]}") #escaped special character + line.gsub!(/\\\<>/,"#{Mx[:gl_o]}#gt#{Mx[:gl_c]}#{Mx[:gl_o]}#gt#{Mx[:gl_c]}") #escaped special character + line.gsub!(/\\\/,"#{Mx[:gl_o]}#gt#{Mx[:gl_c]}") #escaped special character + line.gsub!(/\\\_/,"#{Mx[:gl_o]}#095#{Mx[:gl_c]}") #escaped special character + line.gsub!(/\\\-/,"#{Mx[:gl_o]}#045#{Mx[:gl_c]}") #escaped special character + line.gsub!(/\\\+/,"#{Mx[:gl_o]}#043#{Mx[:gl_c]}") #escaped special character + line.gsub!(/\\\//,"#{Mx[:gl_o]}#047#{Mx[:gl_c]}") #escaped special character + line.gsub!(/\\\#/,"#{Mx[:gl_o]}#035#{Mx[:gl_c]}") #escaped special character + line.gsub!(/\\\&/,"#{Mx[:gl_o]}#038#{Mx[:gl_c]}") #& #escaped special character + line.gsub!(/\\\|/,"#{Mx[:gl_o]}#124#{Mx[:gl_c]}") #not really a sisu special character but made available as possibility + line.gsub!(/\\\:/,"#{Mx[:gl_o]}#058#{Mx[:gl_c]}") #not really a sisu special character but made available as possibility + line.gsub!(/\\\!/,"#{Mx[:gl_o]}#033#{Mx[:gl_c]}") #not really a sisu special character but made available as possibility + line.gsub!(/\\\^/,"#{Mx[:gl_o]}#094#{Mx[:gl_c]}") #not really a sisu special character but made available as possibility + line.gsub!(/\\\,/,"#{Mx[:gl_o]}#044#{Mx[:gl_c]}") #not really a sisu special character but made available as possibility #ADD --> - line.gsub!(/\\\\/,'\') #escaped special character - line.gsub!(/\\\*/,'*') #escaped special character - line.gsub!(/\\\!/,'!') #escaped special character - line.gsub!(/(?:^| )\*~([a-z0-9._-]+)/i,' <:name#\1>') #html name marker - line.gsub!(/^([56]~)(\S+)(.+)/,'\1\2 \3 <:name#\2>') #html name marker , however at present takes you to correct position within sub-toc, will nneed to clean from sub-toc leaving in main body only - line.gsub!(/(^| )\{~\^ (.+?)\s*\}((?:https?|file|ftp):\S+?)([;,.]?(?=\s[^~]|$))/,'\1{ \2 }\3\4 ~{ \3 }~ ') #text url endnote url shortcut {~^ [text] }http://url is { [text] }http://url ~{ http://url }~ [plus adjustment for commas] #means for this class, non-object, un-numbered ~# will not work # shortcut should not be used in conjunction with rebgular matches #reversed order, and addition of no-tilde.. - line.gsub!(/(^| )\{~\^ (.+?)\s*\}((?:https?|file|ftp):\S+)\s+~\{(.+?)\}~/,'\1{ \2 }\3 ~{ \3 \4 }~') # watch - line.gsub!(/<:?br>/,'
') #xml requires - # depreciated --> - line.gsub!(/(^|\s+|['"]| |[\(\[\{]|\>)e\{(.+?)\}e/,'\1\2') #emphasis - line.gsub!(/(^|\s+|['"]| |[\(\[\{]|\>)b\{(.+?)\}b/,'\1\2') #bold - line.gsub!(/(^|\s+|['"]| |[\(\[\{]|\>)u\{(.+?)\}u/,'\1\2') #underscore - line.gsub!(/(^|\s+|['"]| |[\(\[\{]|\>)c\{(.+?)\}c/,'\1\2') #cite /blockquote? - line.gsub!(/(^|\s+|['"]| |[\(\[\{]|\>)i\{(.+?)\}i/,'\1\2') #italics + line.gsub!(/\\\\/,"#{Mx[:gl_o]}#092#{Mx[:gl_c]}") #escaped special character + line.gsub!(/\\\*/,"#{Mx[:gl_o]}#042#{Mx[:gl_c]}") #escaped special character + line.gsub!(/\\\!/,"#{Mx[:gl_o]}#033#{Mx[:gl_c]}") #escaped special character + line.gsub!(/(?:^| )\*~([a-z0-9._-]+)/i," #{Mx[:mk_o]}:name#\\1#{Mx[:mk_c]}") #html name marker + line.gsub!(/^(#{Mx[:lv_o]}[56]:(\S+?)#{Mx[:lv_c]})\s*(.+)/,"\\1 \\3 #{Mx[:mk_o]}:name#\\2#{Mx[:mk_c]}") #html name marker , however at present takes you to correct position within sub-toc, will nneed to clean from sub-toc leaving in main body only + line.gsub!(/(?:<:?br>|
)/,"#{Mx[:br_line]}") #indent used in endnotes, not implemented, replace when ready with: line.gsub!(/(?:
|
)\s*_([12])\s+/,'
<:i\1> ') + if line=~/(^|#{Mx[:gl_c]}| )\{~\^ (.+?)\s*\}((?:https?|file|ftp):\S+)\s*#{Mx[:en_a_o]}(.+?)#{Mx[:en_a_c]}/m + line.gsub!(/(^|#{Mx[:gl_c]}| )\{~\^ ([^}]+?)\s*\}((?:https?|file|ftp):\S+)\s*#{Mx[:en_a_o]}(.+?)#{Mx[:en_a_c]}/m,"\\1{ \\2 }\\3 #{Mx[:en_a_o]} \\3 \\4 #{Mx[:en_a_c]}") # watch + end + if line=~/(^|#{Mx[:gl_c]}| )\{~\^ (.+?)\s*\}((?:https?|file|ftp):\S+?)([;,.]?(?=\s|$))/m + line.gsub!(/(^|#{Mx[:gl_c]}| )\{~\^ (.+?)\s*\}((?:https?|file|ftp):\S+?)([;,.]?(?=\s|$))/m,"\\1{ \\2 }\\3\\4 #{Mx[:en_a_o]} \\3 #{Mx[:en_a_c]} ") + #text url endnote url shortcut {~^ [text] }http://url is { [text] }http://url #{Mx[:en_a_o]} http://url #{Mx[:en_a_c]} [plus adjustment for commas] + #means for this class, non-object, un-numbered ~# will not work # shortcut should not be used in conjunction with rebgular matches #reversed order, and addition of no-tilde.. + end + #line.gsub!(/(^| )\{~\^ (.+?)\s*\}((?:https?|file|ftp):\S+)\s+~\{(.+?)\}~/,'\1{ \2 }\3 ~{ \3 \4 }~') # watch + line.gsub!(/<:?p([nb])>/,"#{Mx[:fa_o]}p\\1#{Mx[:fa_c]}") #indent used in endnotes, not implemented, replace when ready with: line.gsub!(/(?:
|
)\s*_([12])\s+/,'
<:i\1> ') + # depreciated -->#{Mx[:fa_c]} + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[\{]|\>)e\{(.+?)\}e/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #emphasis + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[\{]|\>)b\{(.+?)\}b/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[\{]|\>)u\{(.+?)\}u/,"\\1#{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]}") #underscore + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[\{]|\>)c\{(.+?)\}c/,"\\1#{Mx[:fa_cite_o]}\\2#{Mx[:fa_c_o]}cite#{Mx[:fa_c]}") #cite /blockquote? + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[\{]|\>)i\{(.+?)\}i/,"\\1#{Mx[:fa_italics_o]}\\2#{Mx[:fa_italics_c]}") #italics # depreciated ^ - line.gsub!(/(^|\s+|['"]| |[\(\[\{]|\>)!\{(.+?)\}!/,'\1\2') #emphasis - line.gsub!(/(^|\s+|['"]| |[\(\[\{]|\>)\*\{(.+?)\}\*/,'\1\2') #bold - line.gsub!(/(^|\s+|['"]| |[\(\[\{]|\>)_\{(.+?)\}_/,'\1\2') #underscore - line.gsub!(/(^|\s+|['"]| |[\(\[]|\(|\>)\/\{(.+?)\}\//,'\1\2') #italics - line.gsub!(/(^|\s+|['"]| |\(|\>)\"\{(.+?)\}\"/,'\1\2') #cite /blockquote? - line.gsub!(/(^|[^\\])\^\{(.+?)\}\^/,'\1\2') #superscript - line.gsub!(/(^|\s+|['"]| |\(|\>|\S)9\{(.+?)\}9/,'\1\2') #superscript - line.gsub!(/(^|[^\\]),\{(.+?)\},/,'\1\2') #subscript - line.gsub!(/(^|\s+|['"]| |\(|\>)6\{(.+?)\}6/,'\1\2') #subscript - line.gsub!(/(^|\s+|['"]| |\(|\>)\+\{(.+?)\}\+/,'\1\2') #inserted text - line.gsub!(/(^|\s+|['"]| |\(|\>)v\{(.+?)\}v/,'\1\2') #inserted text - line.gsub!(/(^|\s+|['"]| |\(|\>)-\{(.+?)\}-/,'\1\2') #strikethrough - deleted text - line.gsub!(/(^|\s+|['"]| |\(|\>)x\{(.+?)\}x/,'\1\2') #deleted text - line.gsub!(/(^|\s+|['"]| |\(|\>)\*(\S+?)\*/,'\1\2') #bold single word, watch - line.gsub!(/(^|\s+|['"]| |\(|\>)\!(\S+?)\!/,'\1\2') #bold single word, watch - line.gsub!(/(^|\s+|['"]| |\(|\>)\/([\(\)a-zA-Z0-9']+?)\/([^a-zA-Z0-9]|[ ,.;:'"~$]|$)/,'\1\2\3') #italics single word, watch - line.gsub!(/(^|\s+|['"]| |\(|\>)_(\S+?)_([.,!'")]?(?:\s|$))/,'\1\2\3') #underscore single word, watch (made more complicated by url decoration escape tag (_url)) - line.gsub!(/(^|\s+)-([^{]\S+?)-( |$)/,'\1\2\3') #underscore single word, watch - line.gsub!(/(^|\s+|['"]| |\(|\>|\d+)\^(\S+?)\^/,'\1\2') #superscript single word, watch digit added - line.gsub!(/<[:e]\s+(.+?)!?>/,'~{ \1 }~') # not tested - line.gsub!(/^\s*_([1-9])(\*+)\s*/,'<:i\1> _* ') #bullets, shortcut - line.gsub!(/^\s*_([1-9])\s+/,'<:i\1> ') #indent - line.gsub!(/(?:
|
)\s*_[12]\s+/,'
') #indent used in endnotes, not implemented, replace when ready with: line.gsub!(/(?:
|
)\s*_([12])\s+/,'
<:i\1> ') - line.gsub!(/<:?br>/,'
') #adjustment 2004w41, from # line.gsub!(/
/,'
') + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[\{]|\>)!\{(.+?)\}!/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #emphasis + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[\{]|\>)\*\{(.+?)\}\*/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[\{]|\>)_\{(.+?)\}_/,"\\1#{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]}") #underscore + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\(|\>)\/\{(.+?)\}\//,"\\1#{Mx[:fa_italics_o]}\\2#{Mx[:fa_italics_c]}") #italics + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\"\{(.+?)\}\"/,"\\1#{Mx[:fa_cite_o]}\\2#{Mx[:fa_c_o]}cite#{Mx[:fa_c]}") #cite /blockquote? + line.gsub!(/(^|[^\\])\^\{(.+?)\}\^/,"\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") #superscript + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |\(|\>|\S)9\{(.+?)\}9/,"\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") #superscript + line.gsub!(/(^|[^\\]),\{(.+?)\},/,"\\1#{Mx[:fa_subscript_o]}\\2#{Mx[:fa_subscript_c]}") #subscript + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)6\{(.+?)\}6/,"\\1#{Mx[:fa_subscript_o]}\\2#{Mx[:fa_subscript_c]}") #subscript + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\+\{(.+?)\}\+/,"\\1#{Mx[:fa_insert_o]}\\2#{Mx[:fa_insert_c]}") #inserted text + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)v\{(.+?)\}v/,"\\1#{Mx[:fa_insert_o]}\\2#{Mx[:fa_insert_c]}") #inserted text + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)-\{(.+?)\}-/,"\\1#{Mx[:fa_strike_o]}\\2#{Mx[:fa_strike_c]}") #strikethrough - deleted text + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)x\{(.+?)\}x/,"\\1#{Mx[:fa_strike_o]}\\2#{Mx[:fa_strike_c]}") #deleted text + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\*(\S+?)\*/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold single word, watch + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\!(\S+?)\!/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold single word, watch + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\/([\(\)a-zA-Z0-9']+?)\/([^a-zA-Z0-9]|[ ,.;:'"~$]|$)/,"\\1#{Mx[:fa_italics_o]}\\2#{Mx[:fa_italics_c]}\\3") #italics single word, watch + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)_(\S+?)_([.,!'")]?(?:\s|$))/,"\\1#{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]}\\3") #underscore single word, watch (made more complicated by url decoration escape tag (_url)) + line.gsub!(/(^|#{Mx[:gl_c]}|\s+)-([^{]\S+?)-( |$)/,"\\1#{Mx[:fa_strike_o]}\\2#{Mx[:fa_strike_c]}\\3") #underscore single word, watch + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>|\d+)\^(\S+?)\^/,"\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") #superscript single word, watch digit added + line.gsub!(/<[:e]\s+(.+?)!?>/,"#{Mx[:en_a_o]} \\1 #{Mx[:en_a_c]}") #not tested + line.gsub!(/^\s*_\*\s*/,"#{Mx[:gl_bullet]}") #bullets, shortcut + #line.gsub!(/^\s*_(\*+)\s*/,"#{Mx[:gl_bullet]}") #bullets, shortcut + line.gsub!(/^\s*_([1-9])\*\s*/,"#{Mx[:pa_o]}:i\\1#{Mx[:pa_c]}#{Mx[:gl_bullet]}") #bullets, shortcut + #line.gsub!(/^\s*_([1-9])(\*+)\s*/,"#{Mx[:fa_o]}:i\\1#{Mx[:fa_c]}#{Mx[:fa_o]}\\2#{Mx[:fa_c_o]}") #bullets, shortcut + line.gsub!(/^\s*_([1-9])\s+/,"#{Mx[:pa_o]}:i\\1#{Mx[:pa_c]}") #indent + line.gsub!(/(?:<:?br>|
)/,"#{Mx[:br_line]}") #indent used in endnotes, not implemented, replace when ready with: line.gsub!(/(?:
|
)\s*_([12])\s+/,'
<:i\1> ') ##added #line.gsub!(/(?:^!_\s+|^[7-9]~\s+|<:b>)(.*)?([~-]#)$/i,'\1 \2') #bold paragraph/emphasize #may wish to remove think about 7{ 8{ conversion not satisfactory, as information is lost! - #line.gsub!(/(?:^!_\s+|^[7-9]~\s+|<:b>)(.*)?\s*$/i,'\1') #bold paragraph/emphasize #may wish to remove think about 7{ 8{ conversion not satisfactory, as information is lost! - #line.gsub!(/(?:(?:^| )!_ |^[7-9]~ |<:b>)(.*)\n/mi,'\1 ') #bold paragraph/emphasize #may wish to remove think about 7{ 8{ conversion not satisfactory, as information is lost! + #line.gsub!(/(?:^!_\s+|^[7-9]~\s+|<:b>)(.*)?\s*$/i,'\1') #bold paragraph/emphasize #may wish to remove think about 7{ 8{ conversion not satisfactory, as information is lost! + #line.gsub!(/(?:(?:^| )!_ |^[7-9]~ |<:b>)(.*)\n/mi,'\1 ') #bold paragraph/emphasize #may wish to remove think about 7{ 8{ conversion not satisfactory, as information is lost! #line.gsub!(/^_" (.*)\n/i,'
\1
') #blockquotes #introduce KEEP - line.gsub!(/<:hi>/,'') # bright yellow rgb(255,255,0) pale yellow rgb(255,255,200) - line.gsub!(/<:\/hi>/,'') - line.gsub!(/(<:verse>.+)/m,"\\1\n") + line.gsub!(/<:hi>/,"#{Mx[:fa_hilite_o]}") #'') # bright yellow rgb(255,255,0) pale yellow rgb(255,255,200) + line.gsub!(/<:\/hi>/,"#{Mx[:fa_hilite_c]}") #'') + #line.gsub!(/<:hi>/, + #line.gsub!(/<:\/hi>/,'') + line.gsub!(/(#{Mx[:gr_o]}verse#{Mx[:gr_c]}.+)/m,"\\1\n") line.gsub!(/[ ]+($)/,'\1') - if line =~/(<:(?:verse|group)>)/; line.gsub!(/(<:(?:verse|group)>)/i,"\\1\n") #cosmetic - else line.gsub!(/(
)/i,"\\1\n") + #line.gsub!(/\{(.+?)\}(https?:\S+)/,"#{Mx[:lnk_o]}\\1#{Mx[:lnk_c]}\\2") #linked (text or image) + #line.gsub!(/\{(.+?)\}(image)/,"#{Mx[:lnk_o]}\\1#{Mx[:lnk_c]}\\2") #linked image + ##semantic + #line.gsub!(/([a-z](?:[a-z_:.]+?[a-z])?)+(?::\{(.+?)\}:\1)/m,"\\1#{Mx[:sm_set_o]}\\2#{Mx[:sm_set_c]}\\1") + #line.gsub!(/;\{\s*(.+?)\s*\};([a-z]+(?:[_:.][a-z]+)*)/,"#{Mx[:sm_subset_o]}\\1#{Mx[:sm_subset_c]}\\2") + if line =~/(#{Mx[:gr_o]}(?:verse|group)#{Mx[:gr_c]})/; line.gsub!(/(#{Mx[:gr_o]}(?:verse|group)#{Mx[:gr_c]})/i,"\\1\n") #cosmetic + else line.gsub!(/
/i,"#{Mx[:br_line]}\n") end - elsif line =~/^<:code(?:-end)?>|<:codeline>/ # /^<:code>/ #should be enough # underscore used as escape for angle brackets + elsif line =~/^#{Mx[:gr_o]}code(?:-end)?#{Mx[:gr_c]}|#{Mx[:gr_o]}codeline#{Mx[:gr_c]}/ # /^<:code>/ #should be enough # underscore used as escape for angle brackets line.gsub!(/([<>])/,'_\1') line.gsub!(/_<:(\S+?)_>/,'<:\1>') #convert <:\S+> back, clumsy line.gsub!(/_<(br(?: \/)?)_>/,'<\1>') #convert

back, clumsy - line.gsub!(/(^|\s)<(br(?: \/)?)>([\s,.]|$)/,'\1<\2>\3') #convert

back, clumsy - line.gsub!(/<:codeline>/,"\n  ") #temporary fix, prefer: #line.gsub!(/<:codeline>/,"\n") + line.gsub!(/(^|#{Mx[:gl_c]}|\s)<(br(?: \/)?)>([\s,.]|$)/,'\1<\2>\3') #convert

back, clumsy + line.gsub!(/#{Mx[:gr_o]}codeline#{Mx[:gr_c]}/,"\n  ") #temporary fix, prefer: #line.gsub!(/<:codeline>/,"\n") else # 0~ end line @@ -341,32 +362,32 @@ module Syntax # +2 puts 'tech' @data.each do |line| - line.gsub!(/(^|\s+|['"]|[\(\[]|\>)e\{(.+?)\}e/,'\1\2') #emphasis - line.gsub!(/(^|\s+|['"]|[\(\[]|\>)b\{(.+?)\}b/,'\1\2') #bold - line.gsub!(/(^|\s+|['"]|[\(\[]|\>)u\{(.+?)\}u/,'\1\2') #underscore - line.gsub!(/(^|\s+|['"]|[\(\[]|\>)c\{(.+?)\}c/,'\1\2') #cite - line.gsub!(/(^|\s+|['"]|[\(\[]|\>)i\{(.+?)\}i/,'\1\2') #italics - line.gsub!(/(^|\s+|['"]|[\(\[]|\>)!\{(.+?)\}!/,'\1\2') #emphasis - line.gsub!(/(^|\s+|['"]|[\(\[]|\>)\*\{(.+?)\}\*/,'\1\2') #bold - line.gsub!(/(^|\s+|['"]|[\(\[]|\>)_\{(.+?)\}_/,'\1\2') #underscore - line.gsub!(/(^|\s+|['"]|[\(\[]|\(|\>)\/\{(.+?)\}\//,'\1\2') #italics - line.gsub!(/(^|\s+|['"]|\(|\>)\"\{(.+?)\}\"/,'\1\2') - line.gsub!(/(^|\s+|['"]|\(|\>|\S)\^\{(.+?)\}\^/,'\1\2') - line.gsub!(/(^|\s+|['"]|\(|\>|\S)9\{(.+?)\}9/,'\1\2') - line.gsub!(/(^|\s+|['"]|\(|\>),\{(.+?)\},/,'\1\2') - line.gsub!(/(^|\s+|['"]|\(|\>)6\{(.+?)\}6/,'\1\2') - line.gsub!(/(^|\s+|['"]|\(|\>)\+\{(.+?)\}\+/,'\1\2') - line.gsub!(/(^|\s+|['"]|\(|\>)v\{(.+?)\}v/,'\1\2') - line.gsub!(/(^|\s+|['"]|\(|\>)-\{(.+?)\}-/,'\1\2') - line.gsub!(/(^|\s+|['"]|\(|\>)x\{(.+?)\}x/,'\1\2') - line.gsub!(/(^|\s+|['"]|\(|\>)\*(\S+?)\*/,'\1\2') #bold single word, watch - line.gsub!(/(^|\s+|['"]|\(|\>)\!(\S+?)\!/,'\1\2') #bold single word, watch - line.gsub!(/(^|\s+|['"]|\(|\>)\/([\(\)a-zA-Z0-9']+?)\/([ ,.;:'"~$]|[^a-zA-Z0-9])/,'\1\2\3') #italics single word, watch - line.gsub!(/(^|\s+|['"]|\(|\>)_(\S+?)_/,'\1\2') #underscore single word, watch - line.gsub!(/(^|\s+|['"]|\(|\>|\d+)\^(\S+?)\^/,'\1\2') #superscript single word, watch digit added - line.gsub!(/^\s*_([1-9])(\*+)\s*/,'<:i\1> _* ') # bullets, shortcut - line.gsub!(/^\s*_([1-9])\s+/,'<:i\1> ') - line.gsub!(/<:?br>/,'
') + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\>)e\{(.+?)\}e/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #emphasis + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\>)b\{(.+?)\}b/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\>)u\{(.+?)\}u/,"\\1#{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]}") #underscore + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\>)c\{(.+?)\}c/,"\\1#{Mx[:fa_cite_o]}\\2#{Mx[:fa_c_o]}cite#{Mx[:fa_c]}") #cite + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\>)i\{(.+?)\}i/,"\\1#{Mx[:fa_italics_o]}\\2#{Mx[:fa_italics_c]}") #italics + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\>)!\{(.+?)\}!/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #emphasis + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\>)\*\{(.+?)\}\*/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\>)_\{(.+?)\}_/,"\\1#{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]}") #underscore + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\(|\>)\/\{(.+?)\}\//,"\\1#{Mx[:fa_italics_o]}\\2#{Mx[:fa_italics_c]}") #italics + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\"\{(.+?)\}\"/,"\\1#{Mx[:fa_cite_o]}\\2#{Mx[:fa_c_o]}cite#{Mx[:fa_c]}") + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\^\{(.+?)\}\^/,"\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)9\{(.+?)\}9/,"\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>),\{(.+?)\},/,"\\1#{Mx[:fa_subscript_o]}\\2#{Mx[:fa_subscript_c]}") + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)6\{(.+?)\}6/,"\\1#{Mx[:fa_subscript_o]}\\2#{Mx[:fa_subscript_c]}") + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\+\{(.+?)\}\+/,"\\1#{Mx[:fa_insert_o]}\\2#{Mx[:fa_insert_c]}") + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)v\{(.+?)\}v/,"\\1#{Mx[:fa_insert_o]}\\2#{Mx[:fa_insert_c]}") + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)-\{(.+?)\}-/,"\\1#{Mx[:fa_strike_o]}\\2#{Mx[:fa_strike_c]}") + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)x\{(.+?)\}x/,"\\1#{Mx[:fa_strike_o]}\\2#{Mx[:fa_strike_c]}") + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\*(\S+?)\*/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold single word, watch + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\!(\S+?)\!/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold single word, watch + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\/([\(\)a-zA-Z0-9']+?)\/([ ,.;:'"~$]|[^a-zA-Z0-9])/,"\\1#{Mx[:fa_italics_o]}\\2#{Mx[:fa_italics_c]}\\3") #italics single word, watch + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)_(\S+?)_/,"\\1#{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]}") #underscore single word, watch + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\^(\S+?)\^/,"\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") #check #superscript single word, watch digit added + line.gsub!(/^\s*_\([1-9]\)\(\*\+\)\s*/,"#{Mx[:pa_o]}:i\\1#{Mx[:pa_c]}#{Mx[:fa_o]}\\2#{Mx[:fa_c_o]}") # bullets, shortcut + line.gsub!(/^\s*_\([1-9]\)\s+/,"#{Mx[:pa_o]}:i\\1#{Mx[:pa_c]}") + line.gsub!(/(?:<:?br>|
)\s*_[12]\s+/,"#{Mx[:br_line]} ") #indent used in endnotes, not implemented, replace when ready with: line.gsub!(/(?:
|
)\s*_([12])\s+/,'
<:i\1> ') end @data end diff --git a/lib/sisu/v0/db_import.rb b/lib/sisu/v0/db_import.rb index cb68ffd6..1e788f8e 100644 --- a/lib/sisu/v0/db_import.rb +++ b/lib/sisu/v0/db_import.rb @@ -138,20 +138,20 @@ module SiSU_DB_import end def special_character_escape(string) string.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") - string.gsub!(/<:br>/,"
\n") - string.gsub!(/<:(?:code|alt|group|verse)(?:-end)?>/,'') - string.gsub!(/<:name#\S+?>/,'') + string.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"
\n") + string.gsub!(/#{Mx[:gr_o]}(?:code|alt|group|verse)(?:-end)?#{Mx[:gr_c]}/,'') + string.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'') string.gsub!(/\{\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)\}\S+/,'[image: \1] \2') string.gsub!(/\{\s*(.+?)\s*\}(?:https?|file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2') end def strip_markup(string) #define rules, make same as in dal clean - string.gsub!(/(\d+)<\/sup>/,'[\1]') - string.gsub!(/<:i[12]>/,'') + string.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]') + string.gsub!(/#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/,'') string.gsub!(/(?: \\;)+/,' ') - string.gsub!(//u,"[TABLE]\n") #tables - string.gsub!(//u,'\1') #tables - string.gsub!(/¡¡\d+¡/u,' ') #tables - string.gsub!(/¡/u,' ') #tables tidy later + string.gsub!(/#{Mx[:gr_o]}T[h]?#{Mx[:tc_p]}.+?#{Mx[:gr_c]}/u,"[TABLE]\n") #tables #CHECK should take whole table + string.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1') #tables + string.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ') #tables + string.gsub!(/#{Mx[:tc_p]}/u,' ') #tables tidy later string.gsub!(/<.+?>/,'') string.gsub!(/\{.+?\.(?:png|jpg|gif).+?\}(?:https?|file|ftp)\\\:\S+ /,' [image] ') # else image names found in search string.gsub!(/\s\s+/,' ') @@ -375,23 +375,30 @@ module SiSU_DB_import @col[:en_z]=nil dal_array.each do |data| #data.gsub!(/<[biu]>(.+?)<\/[biu]>/,'\1') # remove bold, italics, underscore - data.gsub!(/(.+?)<\/b>/,'\1') # remove bold, italics, underscore - data.gsub!(/(.+?)<\/i>/,'\1') # remove bold, italics, underscore - data.gsub!(/(.+?)<\/u>/,'\1') # remove bold, italics, underscore - #data.gsub!(/<:name#\S+?>/,'') + data.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') + #data.gsub!(/#{Mx[:fa_emphasis_o]}(.+?)#{Mx[:fa_emphaisis_c]}/,'\1') + data.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') + data.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') + data.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1') + data.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1') + data.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1') + data.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1') + data.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') + data.gsub!(/#{Mx[:gl_o]}(●)#{Mx[:gl_c]}\s*/,'\1 ') + data.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'') @col[:seg]=@@seg - if data =~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m # regular text + if data =~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}#{Mx[:id_c]}/m # regular text notedata=data.dup - if data[/^([123])~\s+(.+?)<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)><([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})>/] + if data[/^#{Mx[:lv_o]}([123]):\S*?#{Mx[:lv_c]}\s*(.+?)#{Mx[:id_o]}~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)#{Mx[:id_c]}#{Mx[:id_o]}([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})#{Mx[:id_c]}/m] @col[:lev],txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=$1,$2,$3,$4,$5,$6,$7 @col[:lid]+=1 - if txt =~/~[{\[][*+]?(\d+)\s+.+?[}\]]~/ + if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ endnotes(txt).range - if txt =~/~\{.+?\}~/; @en << endnotes(txt).standard + if txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/; @en << endnotes(txt).standard end - if txt =~/~\[\*.+?\]~/; @en_ast << endnotes(txt).asterisk + if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/; @en_ast << endnotes(txt).asterisk end - if txt =~/~\[\+.+?\]~/; @en_pls << endnotes(txt).plus + if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_o]}/; @en_pls << endnotes(txt).plus end txt=endnotes(txt).clean_text end @@ -413,7 +420,7 @@ module SiSU_DB_import when /3/; @col[:lv3]+=1 end @col[:lev]=@col[:plaintext]=@col[:body]='' - elsif data[/^4~(.+?)\s+(.+?)<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)><([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})>/] + elsif data[/^#{Mx[:lv_o]}4:(\S*?)#{Mx[:lv_c]}\s*(.+?)#{Mx[:id_o]}~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)#{Mx[:id_c]}#{Mx[:id_o]}([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})#{Mx[:id_c]}/] @@seg,txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=$1,$2,$3,$4,$5,$6,$7 @col[:seg]=@@seg @col[:lv4]+=1 @@ -426,13 +433,13 @@ module SiSU_DB_import end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#@hname.html" - if txt =~/~[{\[][*+]?(\d+)\s+.+?[}\]]~/ + if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ endnotes(txt).range - if txt =~ /~\{.+?\}~/; @en << endnotes(txt).standard + if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/; @en << endnotes(txt).standard end - if txt =~/~\[\*.+?\]~/; @en_ast << endnotes(txt).asterisk + if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/; @en_ast << endnotes(txt).asterisk end - if txt =~/~\[\+.+?\]~/; @en_pls << endnotes(txt).plus + if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/; @en_pls << endnotes(txt).plus end txt=endnotes(txt).clean_text(@base_url) end @@ -449,9 +456,9 @@ module SiSU_DB_import t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) t.tuple @col[:lev]=@col[:plaintext]=@col[:body]='' - elsif data[/^5~(?:~\S+)?(.+?)<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)><([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})>/] # header lev5 seg level + elsif data[/^#{Mx[:lv_o]}5:\S*?#{Mx[:lv_c]}\s*(.+?)#{Mx[:id_o]}~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)#{Mx[:id_c]}#{Mx[:id_o]}([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})#{Mx[:id_c]}/] # header lev5 seg level txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=$1,$2,$3,$4,$5,$6 - re=/^5~(.+?)\s+/ + re=/^#{Mx[:lv_o]}5:(\S*?)#{Mx[:lv_c]}/ @@seg_full=re.match(data)[1] if data=~re #create? @@seg ||='' #nil # watch @col[:seg]=@@seg @@ -465,13 +472,13 @@ module SiSU_DB_import end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#@hname.html" - if txt =~/~[{\[][*+]?(\d+)\s+.+?[}\]]~/ + if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ endnotes(txt).range - if txt =~ /~\{.+?\}~/; @en << endnotes(txt).standard + if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/; @en << endnotes(txt).standard end - if txt =~/~\[\*.+?\]~/; @en_ast << endnotes(txt).asterisk + if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/; @en_ast << endnotes(txt).asterisk end - if txt =~/~\[\+.+?\]~/; @en_pls << endnotes(txt).plus + if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/; @en_pls << endnotes(txt).plus end txt=endnotes(txt).clean_text(@base_url) end @@ -488,9 +495,9 @@ module SiSU_DB_import t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) t.tuple @col[:lev]=@col[:plaintext]=@col[:body]='' - elsif data[/^6~(?:~\S+)?(.+?)<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)><([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})>/] # header lev6 seg level + elsif data[/^#{Mx[:lv_o]}6:\S*?#{Mx[:lv_c]}\s*(.+?)#{Mx[:id_o]}~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)#{Mx[:id_c]}#{Mx[:id_o]}([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})#{Mx[:id_c]}/] # header lev6 seg level txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=$1,$2,$3,$4,$5,$6 - re=/^6~(.+?)\s+/ + re=/^#{Mx[:lv_o]}6:(\S*?)#{Mx[:lv_c]}/ @@seg_full=re.match(data)[1] if data=~re #create? @@seg ||='' #nil # watch @col[:seg]=@@seg @@ -504,13 +511,13 @@ module SiSU_DB_import end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#@hname.html" - if txt =~/~[{\[][*+]?(\d+)\s+.+?[}\]]~/ + if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ endnotes(txt).range - if txt =~ /~\{.+?\}~/; @en << endnotes(txt).standard + if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/; @en << endnotes(txt).standard end - if txt =~/~\[\*.+?\]~/; @en_ast << endnotes(txt).asterisk + if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/; @en_ast << endnotes(txt).asterisk end - if txt =~/~\[\+.+?\]~/; @en_pls << endnotes(txt).plus + if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/; @en_pls << endnotes(txt).plus end txt=endnotes(txt).clean_text(@base_url) end @@ -530,21 +537,21 @@ module SiSU_DB_import else #% regular text @col[:lid]+=1 txt='' - txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=(/(.+?)<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)><([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})>/m).match(data).captures + txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=(/(.+?)#{Mx[:id_o]}~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)#{Mx[:id_c]}#{Mx[:id_o]}([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})#{Mx[:id_c]}/m).match(data).captures @hname=if @col[:seg] \ and not @col[:seg].to_s.empty? - @@hname=@col[:seg].to_s + @@hname=@col[:seg].to_s else @@hname end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#@hname.html" - if txt =~/~[{\[][*+]?(\d+)\s+.+?[}\]]~/ + if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ endnotes(txt).range - if txt =~ /~\{.+?\}~/; @en << endnotes(txt).standard + if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/; @en << endnotes(txt).standard end - if txt =~/~\[\*.+?\]~/; @en_ast << endnotes(txt).asterisk + if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/; @en_ast << endnotes(txt).asterisk end - if txt =~/~\[\+.+?\]~/; @en_pls << endnotes(txt).plus + if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/; @en_pls << endnotes(txt).plus end txt=endnotes(txt).clean_text(@base_url) end @@ -562,12 +569,10 @@ module SiSU_DB_import end if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last end - @col[:body]=if txt=~// #watch + @col[:body]=if txt=~/#{Mx[:gr_o]}T[h]?#{Mx[:tc_p]}.+?#{Mx[:tc_p]}~\d+;\w\d+;\w\d+#{Mx[:gr_c]}/ #watch SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).html_table - elsif txt=~/<:i1>/ - SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).indent1 - elsif txt=~/<:i2>/ - SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).indent2 + elsif txt=~/^#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}/ + SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).indent($1) else SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).norm end @@ -580,11 +585,11 @@ module SiSU_DB_import @col[:en_a]=@col[:en_z]=nil @col[:lev]=@col[:plaintext]=@col[:body]='' end - if notedata =~ /~\{.+?\}~/ #% import into database endnotes tables - endnote_array=notedata.scan(/~\{.+?\}~/) + if notedata =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ #% import into database endnotes tables + endnote_array=notedata.scan(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) endnote_array.each do |inf| - if inf[/~\{\d+.+?<[0-9a-f]{#{@@dl}}>\}~/] # dal new endnotes 2003w31/1 - if inf[/~\{(\d+)(.+?)<([0-9a-f]{#{@@dl}})>\}~/] # dal new endnotes 2003w31/1 + if inf[/#{Mx[:en_a_o]}\d+.+?#{Mx[:id_o]}[0-9a-f]{#{@@dl}}#{Mx[:id_c]}#{Mx[:en_a_c]}/] # dal new endnotes 2003w31/1 + if inf[/#{Mx[:en_a_o]}(\d+)(.+?)#{Mx[:id_o]}([0-9a-f]{#{@@dl}})#{Mx[:id_c]}#{Mx[:en_a_c]}/] # dal new endnotes 2003w31/1 nr,txt,digest_clean=$1,$2,$3 end @id_n+=1 @@ -620,11 +625,11 @@ module SiSU_DB_import end word_mode=notedata.scan(/\S+/) end - if notedata =~ /~\[\*.+?\]~/ #% import into database endnotes tables - endnote_array=notedata.scan(/~\[\*.+?\]~/) + if notedata =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ #% import into database endnotes tables + endnote_array=notedata.scan(/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) endnote_array.each do |inf| - if inf[/~\[\*\d+.+?<[0-9a-f]{#{@@dl}}>\]~/] # dal new endnotes 2003w31/1 - if inf[/~\[[*](\d+)(.+?)<([0-9a-f]{#{@@dl}})>\]~/] # dal new endnotes 2003w31/1 + if inf[/#{Mx[:en_b_o]}\*\d+.+?#{Mx[:id_o]}[0-9a-f]{#{@@dl}}#{Mx[:id_c]}#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 + if inf[/#{Mx[:en_b_o]}[*](\d+)(.+?)#{Mx[:id_o]}([0-9a-f]{#{@@dl}})#{Mx[:id_c]}#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 nr,txt,digest_clean=$1,$2,$3 end @id_n+=1 @@ -661,11 +666,11 @@ module SiSU_DB_import end word_mode=notedata.scan(/\S+/) end - if notedata =~ /~\[\+.+?\]~/ #% import into database endnotes tables - endnote_array=notedata.scan(/~\[\+.+?\]~/) + if notedata =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ #% import into database endnotes tables + endnote_array=notedata.scan(/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) endnote_array.each do |inf| - if inf[/~\[\+\d+.+?<[0-9a-f]{#{@@dl}}>\]~/] # dal new endnotes 2003w31/1 - if inf[/~\[[+](\d+)(.+?)<([0-9a-f]{#{@@dl}})>\]~/] # dal new endnotes 2003w31/1 + if inf[/#{Mx[:en_b_o]}\+\d+.+?#{Mx[:id_o]}[0-9a-f]{#{@@dl}}#{Mx[:id_c]}#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 + if inf[/#{Mx[:en_b_o]}[+](\d+)(.+?)#{Mx[:id_o]}([0-9a-f]{#{@@dl}})#{Mx[:id_c]}#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 nr,txt,digest_clean=$1,$2,$3 end @id_n+=1 @@ -710,38 +715,38 @@ module SiSU_DB_import def endnotes(txt) @txt=txt def standard - x=if @txt =~ /~\{.+?\}~/; @txt.scan(/~\{(\d+).+?\}~/) + x=if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/; @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/) else nil end end def asterisk - x=if @txt =~/~\[\*.+?\]~/; @txt.scan(/~\[[*](\d+).+?\]~/) + x=if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/; @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/) else nil end end def plus - x=if @txt =~/~\[\+.+?\]~/; @txt.scan(/~\[[+](\d+).+?\]~/) + x=if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/; @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/) else nil end end def clean_text(base_url=nil) if base_url - @txt.gsub!(/~\{(\d+).+?\}~/,%{\\1}) - @txt.gsub!(/~\[([*]\d+).+?\]~/,%{\\1}) - @txt.gsub!(/~\[([+]\d+).+?\]~/,%{\\1}) + @txt.gsub!(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,%{\\1}) + @txt.gsub!(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,%{\\1}) + @txt.gsub!(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,%{\\1}) else - @txt.gsub!(/~\{(\d+).+?\}~/,'\1') - @txt.gsub!(/~\[([*]\d+).+?\]~/,'\1') - @txt.gsub!(/~\[([+]\d+).+?\]~/,'\1') + @txt.gsub!(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,'\1') + @txt.gsub!(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,'\1') + @txt.gsub!(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,'\1') end @txt end def range @col[:en_a]=@col[:en_z]=nil - if @txt =~ /~\{.+?\}~|~\[([*]\d+).+?\]~|~\[([+]\d+).+?\]~/ + if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}|#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/ word_array=@txt.scan(/\S+/) word_array.each do |w| - if w[/~[{\[][*+]?(\d+)\s+.+?[}\]]~/] # not tested since change 2003w31 + if w[/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/] # not tested since change 2003w31 @col[:en_a]=$1 unless @col[:en_a] @col[:en_z]=@col[:en_a].dup unless @col[:en_a] @col[:en_z]=$1 if @col[:en_a] diff --git a/lib/sisu/v0/defaults.rb b/lib/sisu/v0/defaults.rb index 5ea3de65..4129893a 100644 --- a/lib/sisu/v0/defaults.rb +++ b/lib/sisu/v0/defaults.rb @@ -151,6 +151,193 @@ module SiSU_Viz end def js_old end + def semantic_tags + def default + #:f => 'firstname', + #:m => 'middlename', + #:l => 'lastname', + #:ed => 'editor', #editor? + #:v => 'version', #edition + { + :pub => 'publication', + :conv => 'convention', + :vol => 'volume', + :pg => 'page', + :cty => 'city', + :org => 'organization', + :uni => 'university', + :dept => 'department', + :fac => 'faculty', + :inst => 'institute', + :co => 'company', + :com => 'company', + :conv => 'convention', + :dt => 'date', + :y => 'year', + :m => 'month', + :d => 'day', + :ti => 'title', + :au => 'author', + :ed => 'editor', #editor? + :v => 'version', #edition + :n => 'name', + :fn => 'firstname', + :mn => 'middlename', + :ln => 'lastname', + :in => 'initials', + :qt => 'quote', + :ct => 'cite', + :ref => 'reference', + :ab => 'abreviation', + :def => 'define', + :desc => 'description', + :trans => 'translate', + } + end + self + end + #% decorate + def decorate_italics + 'title|article|book|journal' + end + def decorate_bold + end + def decorate_uppercase + 'surname' + end + #% semantic + def sem_title #dc 1 + 'title' + end + def sem_article + 'article' + end + def sem_book + 'book' + end + def sem_journal + 'journal' + end + def sem_fullname # (contains: firstname, surname) #issues arise as contains surname etc. + 'fullname' + end + def sem_first + 'first' + end + def sem_surname + 'surname' + end + def sem_middle + 'middle' + end + def sem_creator #dc 2 # == fullname (contains: firstname, surname) + 'creator' + end + def sem_author # == fullname (contains: firstname, surname) + 'author' + end + def sem_editor # == fullname (contains: firstname, surname) + 'editor' + end + def sem_illustrator # == fullname (contains: firstname, surname) + 'illustrator' + end + def sem_translator # == fullname (contains: firstname, surname) + 'translator' + end + def sem_isbn # 10 or 13 + 'isbn' + end + def sem_isbn_10 + 'isbn10' + end + def sem_isbn_13 + 'isbn13' + end + def sem_loc # library of congress + 'loc' + end + def sem_dewey + 'dewey' + end + def sem_pg # project gutenberg number + 'pg' + end + def sem_subject #dc 3 + 'subject' + end + def sem_date #dc 7 + 'date' + end + def sem_date_created + 'date_created' + end + def sem_date_issued + 'date_issued' + end + def sem_date_available + 'date_available' + end + def sem_date_valid + 'date_valid' + end + def sem_date_modified + 'date_modified' + end + def sem_type #dc 8 + 'type' + end + def sem_description #dc 4 + 'description' + end + def sem_publisher #dc 5 + 'publisher' + end + def sem_contributor #dc 6 + 'contributor' + end + def sem_format #dc 9 + 'format' + end + def sem_identifier #dc 10 + 'identifier' + end + def sem_source #dc 11 + 'source' + end + def sem_language #dc 12 + 'language' + end + def sem_relation #dc 13 + 'source' + end + def sem_coverage #dc 14 + 'coverage' + end + def sem_rights #dc 15 + 'rights' + end + def sem_copyright + 'copyright' + end + def sem_license + 'license' + end + def sem_prepared_by + 'prepared_by' + end + def sem_digitized_by + 'digitized_by' + end + def sem_keywords + 'keywords' + end + def sem_comments + 'comments' + end + def sem_abstract + 'abstract' + end #% path def path_stylesheet_home %{ } diff --git a/lib/sisu/v0/digests.rb b/lib/sisu/v0/digests.rb index 0c859010..9510a12a 100644 --- a/lib/sisu/v0/digests.rb +++ b/lib/sisu/v0/digests.rb @@ -150,11 +150,14 @@ module SiSU_Digest_view data.each do |para| x=nil y,para_endnotes=[],[] - if para =~/<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)><([0-9a-f]{#@dl}):([0-9a-f]{#@dl})>/ + if para =~/#{Mx[:id_o]}~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)#{Mx[:id_c]}#{Mx[:id_o]}([0-9a-f]{#@dl}):([0-9a-f]{#@dl})#{Mx[:id_c]}/ ocn,h1,h2,d_clean,d_all=$1,$2,$3,$4,$5 @ocn=ocn unless ocn.to_i == 0 - if para=~/~\{[\d*+]+.+?<[0-9a-f]{#@dl}>\}~/ - para_endnotes << para.scan(/~[{\[]([\d*+]+).+?<([0-9a-f]{#@dl})>[}\]]~/) + para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') + para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') + para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') + if para=~/#{Mx[:en_a_o]}[\d*+]+.+?#{Mx[:id_o]}[0-9a-f]{#@dl}#{Mx[:id_c]}#{Mx[:en_a_c]}/ + para_endnotes << para.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+).+?#{Mx[:id_o]}([0-9a-f]{#@dl})#{Mx[:id_c]}(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/) end ima=[] if para !~/^%+\s/ \ @@ -163,21 +166,21 @@ module SiSU_Digest_view else image=nil end x=case para - when /^0~title/ + when /^#{Mx[:meta_o]}title#{Mx[:meta_c]}/ "\n" + ' '*0 +'@' + ' '*9 - when /^0~subtitle/ + when /^#{Mx[:meta_o]}subtitle#{Mx[:meta_c]}/ "\n" + ' '*1 +'@' + ' '*8 - when /^1~/ + when /^#{Mx[:lv_o]}1:/ "\n" + ' '*2 +':A ' + ' '*6 +'- ' + ocn + ' '*(10-ocn.length) + d_clean + ' ' + d_all - when /^2~/ + when /^#{Mx[:lv_o]}2:/ "\n" + ' '*3 +':B ' + ' '*5 +'- ' + ocn + ' '*(10-ocn.length) + d_clean + ' ' + d_all - when /^3~/ + when /^#{Mx[:lv_o]}3:/ "\n" + ' '*4 +':C ' + ' '*4 +'- ' + ocn + ' '*(10-ocn.length) + d_clean + ' ' + d_all - when /^4~/ + when /^#{Mx[:lv_o]}4:/ "\n" + ' '*5 +'1' + ' '*4 +'- ' + ocn + ' '*(10-ocn.length) + d_clean + ' ' + d_all - when /^5~/ + when /^#{Mx[:lv_o]}5:/ "\n" + ' '*6 +'2' + ' '*3 +'- ' + ocn + ' '*(10-ocn.length) + d_clean + ' ' + d_all - when /^6~/ + when /^#{Mx[:lv_o]}6:/ "\n" + ' '*7 +'3' + ' '*2 +'- ' + ocn + ' '*(10-ocn.length) + d_clean + ' ' + d_all else if para =~/MD5\(\S+?\.sst\)=\s*([0-9a-f]{#@dl})<\/u>/ #watch @@ -248,20 +251,20 @@ module SiSU_Digest_view m_ruby_version=rgx_txt(@tr.ruby_version) case para when /#{m_dc_title}: / - @t=/#{m_dc_title}: (.+?)<~\d;(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#@dl}:[0-9a-f]{#@dl}>/.match(para)[1].gsub(/<\/?u>/,'').strip + @t=/#{m_dc_title}: (.+?)#{Mx[:id_o]}~\d;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#@dl}:[0-9a-f]{#@dl}#{Mx[:id_c]}/.match(para)[1].gsub(/<\/?u>/,'').strip when /#{m_creator}: / - @c=/#{m_creator}: (.+?)<~\d;(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#@dl}:[0-9a-f]{#@dl}>/.match(para)[1].gsub(/<\/?u>/,'').strip + @c=/#{m_creator}: (.+?)#{Mx[:id_o]}~\d;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#@dl}:[0-9a-f]{#@dl}#{Mx[:id_c]}/.match(para)[1].gsub(/<\/?u>/,'').strip when /#{m_sourcefile_digest}.+?/ #watch dgst_extra="\n" + ' '*21 +'source' +' '*4 + @md.dgst[1] + ' '*34 + @md.fns when /Skin_Digest: / dgst_extra="\n" + ' '*21 + 'skin' +' '*6 + @md.dgst_skin[1] + ' '*34 + /(skin_\S+?\.rb)/.match(@md.dgst_skin[0])[1] when /#{m_sisu_version}: / - @v=/#{m_sisu_version}: (.+?)<~\d;(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#@dl}:[0-9a-f]{#@dl}>/.match(para)[1].gsub(/<\/?u>/,'').strip + @v=/#{m_sisu_version}: (.+?)#{Mx[:id_o]}~\d;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#@dl}:[0-9a-f]{#@dl}#{Mx[:id_c]}/.match(para)[1].gsub(/<\/?u>/,'').strip when /#{m_last_generated}: / - @g=/#{m_last_generated}: (.+?)<~\d;(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#@dl}:[0-9a-f]{#@dl}>/.match(para)[1].gsub(/<\/?u>/,'').strip + @g=/#{m_last_generated}: (.+?)#{Mx[:id_o]}~\d;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#@dl}:[0-9a-f]{#@dl}#{Mx[:id_c]}/.match(para)[1].gsub(/<\/?u>/,'').strip # 'doc last generated' when /#{m_ruby_version}: / - @r=/#{m_ruby_version}: (.+?)<~\d;(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#@dl}:[0-9a-f]{#@dl}>/.match(para)[1].gsub(/<\/?u>/,'').strip + @r=/#{m_ruby_version}: (.+?)#{Mx[:id_o]}~\d;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#@dl}:[0-9a-f]{#@dl}#{Mx[:id_c]}/.match(para)[1].gsub(/<\/?u>/,'').strip end dgst_extra ||='' "\n" + prefix +' - ' + ocn + ' '*(10-ocn.length) + d_clean + ' ' + d_all + dgst_extra + "\n" @@ -306,27 +309,27 @@ module SiSU_Digest_view ocn,endnotes=nil,nil data.each do |para| x=case para - when /^0~/; l[0] +=1 - if para =~/^0~title/; '' #' '*0 +'@ == headers' + "\n" + ' '*0 +'headings:' + when /^#{Mx[:meta_o]}/; l[0] +=1 + if para =~/^#{Mx[:meta_o]}title#{Mx[:meta_c]}/; '' #' '*0 +'@ == headers' + "\n" + ' '*0 +'headings:' end - when /^1~/; l[1] +=1 + when /^#{Mx[:lv_o]}1:/; l[1] +=1 ' '*0 +':A' - when /^2~/; l[2] +=1 + when /^#{Mx[:lv_o]}2:/; l[2] +=1 ' '*1 +':B' - when /^3~/; l[3] +=1 + when /^#{Mx[:lv_o]}3:/; l[3] +=1 ' '*2 +':C' - when /^4~/; l[4] +=1 + when /^#{Mx[:lv_o]}4:/; l[4] +=1 ' '*3 +'1' - when /^5~/; l[5] +=1 + when /^#{Mx[:lv_o]}5:/; l[5] +=1 ' '*4 +'2' - when /^6~/; l[6] +=1 + when /^#{Mx[:lv_o]}6:/; l[6] +=1 ' '*5 +'3' else nil end - if para =~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#@dl}:[0-9a-f]{#@dl}>/ + if para =~/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#@dl}:[0-9a-f]{#@dl}#{Mx[:id_c]}/ ocn=$1 unless $1.to_i == 0 end - if para =~/~[{\[]([\d*+]+).+?<[0-9a-f]{#@dl}>[}\]]~/ + if para =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+).+?#{Mx[:id_o]}[0-9a-f]{#@dl}#{Mx[:id_c]}(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ endnotes=$1 unless $1.to_i == 0 end dal_structure_tree("#{x}\n") if x and not x.empty? diff --git a/lib/sisu/v0/help.rb b/lib/sisu/v0/help.rb index acbae57b..22736aac 100644 --- a/lib/sisu/v0/help.rb +++ b/lib/sisu/v0/help.rb @@ -1425,7 +1425,7 @@ WOK sudo ruby ./sisu-install setup if rant is installed on your system you may instead run: sudo rant base - + for further options: ./sisu-install -T diff --git a/lib/sisu/v0/html.rb b/lib/sisu/v0/html.rb index 208b7409..5b9639dd 100644 --- a/lib/sisu/v0/html.rb +++ b/lib/sisu/v0/html.rb @@ -75,6 +75,7 @@ module SiSU_HTML require "#{SiSU_lib}/html_tune" include SiSU_Tune require "#{SiSU_lib}/shared_xml" + require "#{SiSU_lib}/shared_structure" class Source def initialize(opt) @opt=opt @@ -180,52 +181,7 @@ module SiSU_HTML @tuned_file_array end end - class Split_text_object - include SiSU_Viz - include SiSU_HTML_Format_type - @@dp=nil - attr_reader :format,:text,:ocn,:scroll_lev_para_ocn,:seg_lev_para_ocn - def initialize(md,para) - @md,@para=md,para - @format,@ocn='null','null' - #@format,@ocn=nil,nil - @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern - end - def lev_segname_para_ocn #needs work 2003w29 - if @para =~/^\d~.+?<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - if @para[/^([1-6])~(\S+)\s+(\S.+?)<~(\d+);(?:[ohmu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m] - @format,segname,@text,@ocn=$1,$2,$3,$4 - @format="#@format~#{segname}" # - elsif @para[/^([1-6]~)\s+(\S.+?)<~(\d+);(?:[ohmu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m] - @format,@text,@ocn=$1,$2,$3 - end - else - if @para[/^(?:<:i([1-9])>\s*_\*)\s+(.+?)<~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m] - @format,@text,@ocn="_#{$1}\*",$2,$3,$4 - elsif @para[/^(_\*)\s+(.+?)<~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m] - @format,@text,@ocn=$1,$2,$3 - elsif @para[/<:(i[1-9])>\s*(.+?)<~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m] - @format,@text,@ocn=$1,$2,$3 - elsif @para[/<:(code|alt|verse|group)>(.+?)<~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m] - @format,@text,@ocn=$1,$2,$3 - elsif @para[/(.+?)<~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m] - @text,@ocn=$1,$2 #,$3 - end - if @para !~/<~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/ #added 2002w06 - @text=@para[/(.+?)/m,1] - end - if @para[/^(\d)~\S*\s+(.+)/m] - @format,@text=$1,$2 - end - end - @seg_lev_para_ocn=if @para[/.+<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/] - SiSU_HTML_Format_type::Format_seg.new(@md,@format,@text,@ocn) - end - @scroll_lev_para_ocn=if @para[/.+<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/] - SiSU_HTML_Format_type::Format_scroll.new(@md,@format,@text,@ocn) - end - self - end + class Split_text_object / - if pg =~/~[{\[][\d*+]+ / - endnote_array=[] - if pg=~/~\{[\d*+].+?\}\~/m - endnote_array << pg.scan(/~\{[\d*+]+(.+?)\}\~/m) + unless pg =~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/ + if pg =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[\d*+]+ <#@dp:#@dp>$/ + @pat_heading=/^(?:#{Mx[:lv_o]}[1-6]:\S*?#{Mx[:lv_c]}\s*)?(.*)#{Mx[:id_o]}~(\d+);(?:[hm]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ @pat_strip_heading_name=/(.+?)<\/a>/ @tell=SiSU_Screen::Ansi.new(@md.cmd) end @@ -306,22 +262,22 @@ module SiSU_HTML toc=nil @@firstseg=nil @data.each do |para| - if para =~/^([1-6]~|4~!)/ + if para =~/^(?:#{Mx[:lv_o]}[1-6]:|4~!)/ para_toc=para.dup - para_toc.gsub!(/  [\d*+]+<\/sup> <\/a>\s+~[{\[].+?[}\]]~/m,'') #remove endnotes from toc + para_toc.gsub!(/  [\d*+]+<\/sup> <\/a>\s*(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]}).+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m,'') #remove endnotes from toc toc=case para_toc - when /^\s*1~(\S+)?/; Toc.new(para_toc,@md).level_1 - when /^\s*2~(\S+)?/; Toc.new(para_toc,@md).level_2 - when /^\s*3~(\S+)?/; Toc.new(para_toc,@md).level_3 - when /^\s*4~(\S+)?/; Toc.new(para_toc,@md).level_4 - when /^\s*5~(\S+)?/; Toc.new(para_toc,@md).level_5 - when /^\s*6~(\S+)?/; Toc.new(para_toc,@md).level_6 + when /^\s*#{Mx[:lv_o]}1:\S*/; Toc.new(para_toc,@md).level_1 + when /^\s*#{Mx[:lv_o]}2:\S*/; Toc.new(para_toc,@md).level_2 + when /^\s*#{Mx[:lv_o]}3:\S*/; Toc.new(para_toc,@md).level_3 + when /^\s*#{Mx[:lv_o]}4:\S+/; Toc.new(para_toc,@md).level_4 + when /^\s*#{Mx[:lv_o]}5:\S*/; Toc.new(para_toc,@md).level_5 + when /^\s*#{Mx[:lv_o]}6:\S*/; Toc.new(para_toc,@md).level_6 when /^\s*4~!/; Toc.new(para_toc).level_crosslink else end if @@firstseg.nil? \ - and para=~/^4~\S+?/ - @@firstseg=/^4~(\S+)?/.match(para)[1] + and para=~/^#{Mx[:lv_o]}4:\S+?#{Mx[:lv_c]}/ + @@firstseg=/^#{Mx[:lv_o]}4:(\S+?)#{Mx[:lv_c]}/.match(para)[1] end if toc begin @@ -374,11 +330,11 @@ WOK end def level_1 para=@data - unless para =~/<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + unless para =~/#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ para.gsub!(@pat_strip_heading_name,'\1') end para[@pat_heading] - linkname,link=$1,$2 if $& + linkname,link=$1.strip,$2 if $& if link \ and link !~/#/ #% keep eye on link p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,link) @@ -390,26 +346,26 @@ WOK end toc={} format_toc=SiSU_HTML_Format_type::Format_toc.new(@md,title) - toc[:seg]=if para =~/^\d~meta\s+Document Information/ + toc[:seg]=if para =~/^#{Mx[:lv_o]}\d:meta^#{Mx[:lv_c]}\s*Document Information/ format_toc.lev0 else format_toc.lev1 end - title=if para =~/(<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>)$/ + title=if para =~/(#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})$/ m=/#{$1}/ para.gsub!(m,'') - if para=~/^\d~meta\s+Document Information/ + if para=~/^#{Mx[:lv_o]}\d:meta#{Mx[:lv_c]}\s*Document Information/ %{#{linkname}} else linkname end else @@toc[:scr] << '
' - link=if para =~/<~[1-9];([0-6]):\d+;\w\d+><#@dp:#@dp>$/; $1 + link=if para =~/#{Mx[:id_o]}~[1-9];([0-6]):\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/; $1 else '' end %{#{linkname}} end format_toc=SiSU_HTML_Format_type::Format_toc.new(@md,title) - toc[:scr]=if para =~/^\d~meta\s+Document Information/ + toc[:scr]=if para =~/^#{Mx[:lv_o]}\d:meta^#{Mx[:lv_c]}\s*Document Information/ format_toc.lev0 else format_toc.lev1 end @@ -417,11 +373,11 @@ WOK end def level_2 para=@data - unless para =~/<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + unless para =~/#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ para.gsub!(@pat_strip_heading_name,'\1') end para[@pat_heading] - linkname,link=$1,$2 if $& + linkname,link=$1.strip,$2 if $& if link \ and link !~/#/ p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,link) @@ -429,7 +385,7 @@ WOK format_toc=SiSU_HTML_Format_type::Format_toc.new(@md,linkname) toc={} toc[:seg]=format_toc.lev2 - if para =~/(<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>)$/ + if para =~/(#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})$/ m=/#{$1}/ para.gsub!(m,'') title=linkname @@ -444,7 +400,7 @@ WOK para.gsub!(@pat_strip_heading_name,'\1') para.gsub(/(.*?)<\/a>(.*)/,'\1') #2002w42 altered gsub! - problematic? - suspect para[@pat_heading] - linkname,link=$1,$2 if $& + linkname,link=$1.strip,$2 if $& if link \ and link !~/#/ p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,link) @@ -452,7 +408,7 @@ WOK format_toc=SiSU_HTML_Format_type::Format_toc.new(@md,linkname) toc={} toc[:seg]=format_toc.lev3 - if para =~/(<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>)$/ + if para =~/(#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})$/ m=/#{$1}/ para.gsub!(m,'') title=linkname @@ -465,19 +421,19 @@ WOK def level_4 para=@data unless para =~/~metadata/ - unless para =~/<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + unless para =~/#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ para.gsub!(@pat_strip_heading_name,'\1') para[@pat_heading] - linkname,link=$1,$2 if $& + linkname,link=$1.strip,$2 if $& p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,link) if link end - para.gsub!(/<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/,'') - if para =~/^4~/ - seg_link=para.gsub(/^\s*4~(\S+)\s+(.+?)$/, + para.gsub!(/#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/,'') + if para =~/^#{Mx[:lv_o]}4:/ + seg_link=para.gsub(/^\s*#{Mx[:lv_o]}4:(\S+?)#{Mx[:lv_c]}\s*(.+?)$/, %{ \\2 }) - @@seg_url=para[/^4~(\S+).+?$/,1] + @@seg_url=para[/^#{Mx[:lv_o]}4:(\S+?)#{Mx[:lv_c]}.+?$/,1] elsif para =~/\d+.\d+.\d+.\d+|\d+.\d+.\d+|\d+.\d+|\d+/ seg_link=para.gsub(/^\s*(#{@md.lv4}\s+)\s*(\d+.\d+.\d+.\d+|\d+.\d+.\d+|\d+.\d+|\d+)(.*)/, %{<#@dp:#@dp>$/ + if para =~/#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ m=/#{$1}/ para.gsub!(m,'') title=linkname @@ -500,17 +456,17 @@ WOK end def level_5 para=@data - if para !~/<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + if para !~/#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ para.gsub!(@pat_strip_heading_name,'\1') end para[@pat_heading] - linkname,link=$1,$2 if $& + linkname,link=$1.strip,$2 if $& if link \ and link !~/#/ p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,link) end toc={} - if para =~/<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + if para =~/#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ m=/#{$1}/ para.gsub!(m,'') title=linkname @@ -528,17 +484,17 @@ WOK end def level_6 para=@data - if para !~/<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + if para !~/#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ para.gsub!(@pat_strip_heading_name,'\1') end para[@pat_heading] - linkname,link=$1,$2 if $& + linkname,link=$1.strip,$2 if $& if link \ and link !~/#/ p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,link) end toc={} - if para =~/<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + if para =~/#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ m=/#{$1}/ para.gsub!(m,'') title=linkname @@ -671,7 +627,7 @@ WOK def publish scroll=[] hr='

' - scroll << @scr_toc << hr << @scr_body << hr << @scr_endnotes << hr << @scr_metadata << @scr_owner_details << @scr_tails + scroll << @scr_toc << hr << @scr_body << @scr_endnotes << hr << @scr_metadata << @scr_owner_details << @scr_tails scroll.flatten!.compact! end end @@ -690,6 +646,7 @@ WOK @data.each do |para| para.strip! para.gsub!(/<:.+?>/,'') + para.gsub!(/#{Rx[:mx_fa_clean]}/,'') unless para =~/\A\s*\Z/ @filename_html_scroll.puts para,"\n" end @@ -703,7 +660,7 @@ WOK @filename_html_index=@my_make.file_html_index(@md) @data.each do |para| para.strip! - para.gsub!(/|<~\d+;(?:[ohm]|[0-6]:)\d+;\w\d+><#@dp:#@dp>/,'') + para.gsub!(/|#{Mx[:gr_o]}.*?#{Mx[:gr_c]}|#{Mx[:id_o]}~\d+;(?:[ohm]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'') unless para =~/\A\s*\Z/ @filename_html_segtoc.puts para,"\n" @filename_html_index.puts para,"\n" diff --git a/lib/sisu/v0/html_format.rb b/lib/sisu/v0/html_format.rb index 525ef50d..03d97a8d 100644 --- a/lib/sisu/v0/html_format.rb +++ b/lib/sisu/v0/html_format.rb @@ -960,7 +960,8 @@ WOK end def endnote_mark %{

-


} #revisit +
+

} #revisit end end class Format_text_object @@ -969,10 +970,10 @@ WOK attr_accessor :md,:one,:two,:three,:parablock,:table,:link,:linkname,:format,:paranum,:p_num,:para_id,:headname,:margin,:paragraph,:table,:banner,:url,:icon,:font,:one_stripped def initialize(md='',*txt) @md,@one,@two,@three=md,txt[0],txt[1],txt[2] - rgx=/^[1-6-]~/ + rgx=/^#{Mx[:lv_o]}\d:\S*?#{Mx[:lv_c]}/ @one_stripped=@one.gsub(rgx,'') if @one =~rgx @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern - rgx=/~[{\[][\d*+]+\s+(.+?)<#@dp>[}\]]~/ #problem introduced + rgx=/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[\d*+]+\s+(.+?)#{Mx[:id_o]}#@dp#{Mx[:id_c]}(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ #problem introduced @one_stripped=@one.gsub(rgx,'\1') if @one =~rgx #problem introduced @link,@linkname=txt[0],txt[1] @format,parablock=txt[0],txt[1] @@ -989,7 +990,7 @@ WOK @para_id=Paragraph_id_ocn.new(@md,@paranum) #used by table version end @headname='' - if @format =~ /\d~(\S+)/ + if @format=~/^\d:(\S+)/ #need more reliable marker #if @format =~ /#{Rx[:lv]}/ headname=$1 #format[/\d~(\S+)/m,1] @headname=if headname =~/^[a-zA-Z]/; %{} #consider: h_#{headname} else %{} @@ -1045,12 +1046,12 @@ WOK end def gsub_body case @one - when /^(?:<:i[1-9]>\s*)?\((i+|iv|v|vi+|ix|x|xi+)\)/ + when /^(?:#{Mx[:pa_o]}i[1-9]#{Mx[:pa_c]}\s*)?\((i+|iv|v|vi+|ix|x|xi+)\)/ @one.gsub!(/^\((i+|iv|v|vi+|ix|x|xi+)\)/,'(\1)') - @one.gsub!(/^(<:i[1-9]>)\s*\((i+|iv|v|vi+|ix|x|xi+)\)/,'\1(\2)') - when /^(?:<:i[1-9]>\s*)?\(?(\d|[a-z])+\)/ + @one.gsub!(/^(#{Mx[:pa_o]}i[1-9]#{Mx[:pa_c]})\s*\((i+|iv|v|vi+|ix|x|xi+)\)/,'\1(\2)') + when /^(?:#{Mx[:pa_o]}i[1-9]#{Mx[:pa_c]}\s*)?\(?(\d|[a-z])+\)/ @one.gsub!(/^\((\d+|[a-z])+\)/,'(\1)') - @one.gsub!(/^(<:i[1-9]>)\s*\((\d+|[a-z])+\)/,'\1(\2)') + @one.gsub!(/^(#{Mx[:pa_o]}i[1-9]#{Mx[:pa_c]})\s*\((\d+|[a-z])+\)/,'\1(\2)') when /^\s*\d{1,3}\.\s/ @one.gsub!(/^\s*(\d+\.)/,'\1') when /^\s*[A-Z]\.\s/ @@ -1069,7 +1070,7 @@ WOK def bold_header @one.gsub!(/[1-9]~(\S+)/,'') @one.gsub!(/[1-9]~/,'') - @one.gsub!(/<~0;[um]\d+;[um]\d+><#@dp:#@dp>\s*$/i,'') #watch & do differently + @one.gsub!(/#{Mx[:id_o]}~0;[um]\d+;[um]\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}\s*$/i,'') #watch & do differently %{

#@one

diff --git a/lib/sisu/v0/html_format_css.rb b/lib/sisu/v0/html_format_css.rb index db381e63..ace6983b 100644 --- a/lib/sisu/v0/html_format_css.rb +++ b/lib/sisu/v0/html_format_css.rb @@ -352,10 +352,10 @@ module SiSU_HTML_Format_type else @one end note='' - if one =~/(~[{\[].+?[}\]]~\s*)/m #this is a clumsy fix, revisit and address upstream + if one =~/((?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]}).+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})\s*)/m #this is a clumsy fix, revisit and address upstream note=$1 note.gsub!(/[\n\s]+/m,' ') - one.gsub!(/~[{\[].+?[}\]]~\s*/m,' ') + one.gsub!(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]}).+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})\s*/m,' ') one.gsub!(/ \d+<\/sup> /m,'') end %{<#@tag class="#@class"> @@ -384,7 +384,7 @@ module SiSU_HTML_Format_type #{@vz.table_close}} end def header_sub - @parablock.gsub!(/~[{\[].+?[}\]]~\s*/m,' ') + @parablock.gsub!(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]}).+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})\s*/m,' ') %{#{@vz.margin_css} #{@headname} <#@tag class="#@class" #{@p_num.id}>#{@p_num.name} diff --git a/lib/sisu/v0/html_scroll.rb b/lib/sisu/v0/html_scroll.rb index 97a1a02e..0bbac4fa 100644 --- a/lib/sisu/v0/html_scroll.rb +++ b/lib/sisu/v0/html_scroll.rb @@ -84,39 +84,39 @@ module SiSU_HTML_scroll @rcdc=false @scr={ :body=>[],:metadata=>[],:owner_details=>[] } data.each do |para| - if para =~/^\d~endnotes\s+Endnotes/ - para.gsub!(/Endnotes.+/,'') - end - if para =~/^\d~meta\s+Document Information/ + #if para =~/^#{Mx[:lv_o]}\d:endnotes#{Mx[:lv_c]}\s*Endnotes/ + ## para.gsub!(/Endnotes.+/,'') + #end + if para =~/^#{Mx[:lv_o]}\d:meta#{Mx[:lv_c]}\s*Document Information/ para.gsub!(/(Document Information(?: \(metadata\))?)/,'\1') end - if para =~/^\d~metadata\s+Metadata/ + if para =~/^#{Mx[:lv_o]}\d:metadata#{Mx[:lv_c]}\s*Metadata/ para.gsub!(/(Metadata)/,'\1') end if @rcdc==false \ - and (para =~/^\d~metadata/ or para =~/^1~meta\s+Document Information/) + and (para =~/^#{Mx[:lv_o]}:metadata#{Mx[:lv_c]}/ or para =~/^#{Mx[:lv_o]}1:meta#{Mx[:lv_c]}\s*Document Information/) @rcdc=true end - if para !~/(^0~||)/ - unless para =~/^<:code>/; para.gsub!(/~[{\[].+?[}\]]~\s+/m,' ') + if para !~/(^#{Rx[:meta]}|#{Mx[:br_endnotes]}|#{Mx[:br_eof]})/ + unless para =~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/; para.gsub!(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]}).+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})\s*/m,' ') end - if para =~/.+?<~\d+;(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - paranum=para[/.+?<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/,1] + if para =~/.+?#{Mx[:id_o]}~\d+;(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ + paranum=para[/.+?#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/,1] @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) end - @sto=SiSU_HTML::Source::Split_text_object.new(@md,para).lev_segname_para_ocn - m=/<~\d+;(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + @sto=SiSU_HTML::Source::Split_text_object.new(@md,para).html_scroll + m=/#{Mx[:id_o]}~\d+;(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ if para =~m - format_txt_obj=SiSU_HTML_Format_type::Format_text_object.new(@md,@sto.text) if @sto.format =~/i[1-9]|_[1-9]?\*|<:i[1-9]>\s*_\*|null/ + format_txt_obj=SiSU_HTML_Format_type::Format_text_object.new(@md,@sto.text) if @sto.format =~/i[1-9]|null/ #watch case @sto.format - when /^1~\S*/; para=@sto.scroll_lev_para_ocn.heading_body1 - when /^2~\S*/; para=@sto.scroll_lev_para_ocn.heading_body2 - when /^3~\S*/; para=@sto.scroll_lev_para_ocn.heading_body3 - when /^4~\S+/; para=@sto.scroll_lev_para_ocn.heading_body4 # work on see Split_text_object4 - when /^5~\S*/; para=@sto.scroll_lev_para_ocn.heading_body5 - when /^6~\S*/; para=@sto.scroll_lev_para_ocn.heading_body6 - when /^_\*$/; para=@sto.scroll_lev_para_ocn.bullet - when /^_([1-9])\*$/ #indent with bullet + when /^1:\S*?/; para=@sto.scroll_lev_para_ocn.heading_body1 + when /^2:\S*?/; para=@sto.scroll_lev_para_ocn.heading_body2 + when /^3:\S*?/; para=@sto.scroll_lev_para_ocn.heading_body3 + when /^4:\S+?/; para=@sto.scroll_lev_para_ocn.heading_body4 # work on see Split_text_object4 + when /^5:\S*?/; para=@sto.scroll_lev_para_ocn.heading_body5 + when /^6:\S*?/; para=@sto.scroll_lev_para_ocn.heading_body6 + when /^#{Mx[:gl_bullet]}/; para=@sto.scroll_lev_para_ocn.bullet + when /^#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}#{Mx[:gl_bullet]}/ #indent with bullet format_txt_obj.gsub_body para=@sto.scroll_lev_para_ocn.format('li',"i#{$1}") when /^i([1-9])$/ #indent @@ -130,26 +130,26 @@ module SiSU_HTML_scroll if para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/ format_txt_obj.gsub_body para=@sto.scroll_lev_para_ocn.para - if para =~/<#@dp:#@dp>$/ + elsif para =~/^#{Mx[:lv_o]}[1-9]:/ \ + and para !~/#{Mx[:id_o]}~\d+;(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_c]}#@dp:#@dp#{Mx[:id_c]}$/ format_txt_obj=SiSU_HTML_Format_type::Format_text_object.new(@md,para) para=format_txt_obj.bold_header elsif para =~/Endnotes?/ \ - and para !~/<~\d+;(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + and para !~/#{Mx[:id_o]}~\d+;(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ format_txt_obj=SiSU_HTML_Format_type::Format_text_object.new(@md,'
Note') para=format_txt_obj.bold_para elsif para =~/Owner Details/ \ - and para !~/<~\d+;(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + and para !~/#{Mx[:id_o]}~\d+;(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ format_txt_obj=SiSU_HTML_Format_type::Format_text_object.new(@md,'
Owner Details') @scr[:owner_details]=format_txt_obj.bold_para para='' - elsif para =~/(.*)<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>(.*)/ #watch + elsif para =~/(.*)#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}(.*)/ #watch one,two=$1,$2 format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) para=format_seg.no_paranum @@ -162,9 +162,8 @@ module SiSU_HTML_scroll one,two=/(.*)<:center>(.*)/.match(para).captures format_scroll=SiSU_HTML_Format_type::Format_scroll.new(@md,one,two) end - para.gsub!(//,' ') - para.gsub!(/^<:\S?>/,'') - para.gsub!(/<:\S?>/,' ') + para.gsub!(/^#{Rx[:mx_fa_clean]}/,' '); para.gsub!(/^<:\S?>/,'') + para.gsub!(/#{Rx[:mx_fa_clean]}/,' '); para.gsub!(/<:\S?>/,' '); para.gsub!(//,' ') para.strip! unless @rcdc; @scr[:body] << para unless para =~/\A\s*\Z/ else @scr[:metadata] << para diff --git a/lib/sisu/v0/html_segments.rb b/lib/sisu/v0/html_segments.rb index a2d06ed9..a15c302e 100644 --- a/lib/sisu/v0/html_segments.rb +++ b/lib/sisu/v0/html_segments.rb @@ -103,9 +103,9 @@ module SiSU_HTML_seg @h_sfx=@md.sfx if @md.file_type =~/html/ @h_sfx='.html' if @md.file_type =~/html/ #used in creating file, not to be omitted. data.each do |para| - if para =~/^4~/ - @@seg_name << para[/^4~(\S+)/,1] - seg_name=para[/^4~(\S+)/,1] + if para =~/^#{Mx[:lv_o]}4:/ + @@seg_name << para[/^#{Mx[:lv_o]}4:(\S+?)#{Mx[:lv_c]}/,1] + seg_name=para[/^#{Mx[:lv_o]}4:(\S+?)#{Mx[:lv_c]}/,1] @@seg_ad[seg_name]=para[/.+?<:\d\s+(.+)\s*?>/,1] #watch end end @@ -116,36 +116,36 @@ module SiSU_HTML_seg tell.segmented unless @md.cmd =~/q/ flagend='y' data.each do |para| - if para =~/^4~.+/ #watch - if para =~/<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - @@header4=para.to_s[/^4~(?:\S+\s+)?(.+?)<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/,1] - else @@header4=para.to_s[/^4~(?:\S+\s+)?(.+)/,1] + if para =~/^#{Mx[:lv_o]}4:/ #watch + if para =~/#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ + @@header4=para.to_s[/^#{Mx[:lv_o]}4:\S+?#{Mx[:lv_c]}(.+?)#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/,1] + else @@header4=para.to_s[/^#{Mx[:lv_o]}4:\S+?#{Mx[:lv_c]}(.+)/,1] end @@is4=newfile=1 end - if para =~/^3~.+/ - @@header3=para.to_s[/^3~(?:~\S+\s+)?(.+)/,1] + if para =~/^#{Mx[:lv_o]}3:\S*?#{Mx[:lv_c]}/ + @@header3=para.to_s[/^#{Mx[:lv_o]}3:\S*?#{Mx[:lv_c]}\s*?(.+)/,1] @@is4,@@is3=0,1 end - if para =~/^2~.+/ - @@header2=para.to_s[/^2~(?:~\S+\s+)?(.+)/,1] + if para =~/^#{Mx[:lv_o]}2:\S*?#{Mx[:lv_c]}/ + @@header2=para.to_s[/^#{Mx[:lv_o]}2:\S*?#{Mx[:lv_c]}\s*?(.+)/,1] @@is4,@@is3,@@is2=0,0,1 end - if para =~/^1~.+/ - @@header1=para.to_s[/^1~(?:~\S+\s+)?(.+)/,1] + if para =~/^#{Mx[:lv_o]}1:\S*?#{Mx[:lv_c]}/ + @@header1=para.to_s[/^#{Mx[:lv_o]}1:\S*?#{Mx[:lv_c]}\s*?(.+)/,1] @@is4,@@is3,@@is2,@@is1=0,0,0,1 end if (@@is1 && !@@is2 && !@@is3 && !@@is4) - unless para =~/^1~/; head1=$_ #; + unless para =~/^#{Mx[:lv_o]}1:/; head1=$_ #; end end if @@is4 == 1 \ - or para =~/^|^/ + or para =~/^#{Mx[:br_endnotes]}|^#{Mx[:br_eof]}/ if newfile == 1 \ - or para =~/^|^/ + or para =~/^#{Mx[:br_endnotes]}|^#{Mx[:br_eof]}/ newfile=0 - if para =~/^4~\S+/ \ - or para =~/^|^/ # @@level4 + if para =~/^#{Mx[:lv_o]}4:\S+?#{Mx[:lv_c]}/ \ + or para =~/^#{Mx[:br_endnotes]}|^#{Mx[:br_eof]}/ # @@level4 if tracking != 0 mkdir_p(@md.dir_out) unless FileTest.directory?(@md.dir_out) #bug - added specifically for nav! not needed by regular seg, check !!! Seg.new('',@md).tail @@ -174,8 +174,8 @@ module SiSU_HTML_seg tracking=tracking + 1 end m=para[/.+?.*/]; @@get_hash_to=$1 if m # changed 2002w42, again w44 ! & again 2003w16 - m=para[/^4~(\S+)/]; @@get_hash_fn=$1 if m - para=if para =~//um; para.split(/\n/) + m=para[/^#{Mx[:lv_o]}4:(\S+?)#{Mx[:lv_c]}/]; @@get_hash_fn=$1 if m + para=if para =~/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}\s*c|#{Mx[:gr_o]}(?:code|alt|verse|group)#{Mx[:gr_c]}/um; para.split(/\n/) else para end if para.class == String @@ -195,7 +195,7 @@ module SiSU_HTML_seg end def header_art(para) format_head_seg=SiSU_HTML_Format_type::Head_seg.new(@md) - if para =~/^[0-6]~/ #2004w27/5 + if para =~/^#{Mx[:lv_o]}[1-6]:/ #2004w27/5 if @@tracker < @@seg_total-1; @@seg[:dot_nav]=format_head_seg.dot_control_pre_next else @@seg[:dot_nav]=format_head_seg.dot_control_pre end @@ -204,7 +204,7 @@ module SiSU_HTML_seg @@seg[:title]=format_head_seg.head << ads.div.major end def head(para) - clean=/|<:.*?>|<~\d+;(?:[ohum]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + clean=/|#{Mx[:gr_o]}:.*?#{Mx[:gr_c]}|<:.*?>|#{Mx[:id_o]}~\d+;(?:[ohum]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ format_head_seg=SiSU_HTML_Format_type::Head_seg.new(@md) if @@tracker < @@seg_total-1 if @@tracker == 0; @@segtocband=format_head_seg.toc_next2 #if format_head_seg.toc_next2 @@ -218,7 +218,7 @@ module SiSU_HTML_seg @@seg[:tocband] << format_head_seg.navigation_band(@@segtocband,@@seg[:dot_nav]) @@seg[:headers] << format_head_seg.seg_head_escript if SiSU_HTML_Format_type::Head_seg.method_defined? :seg_head_escript #debug PHP move up in text #bug @@seg[:headers] << format_head_seg.title_banner(@md.title,@md.subtitle,@dc_creator).gsub(clean,'') - paranum=if @@header1[/.+?<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/]; $1 + paranum=if @@header1[/.+?#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/]; $1 else '' end @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) @@ -228,7 +228,7 @@ module SiSU_HTML_seg end if @@is2 == 1 header2=@@header2 - paranum=if header2[/.+?<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/]; $1 + paranum=if header2[/.+?#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/]; $1 else '' end @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) @@ -238,7 +238,7 @@ module SiSU_HTML_seg end if @@is3 == 1 header3=@@header3 - paranum=if header3[/.+?<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/]; $1 + paranum=if header3[/.+?#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/]; $1 else '' end @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) @@ -248,7 +248,7 @@ module SiSU_HTML_seg end if @@is4 == 1 header4=@@header4 - paranum=if header4[/.+?<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/]; $1 + paranum=if header4[/.+?#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/]; $1 else '' end @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) @@ -261,47 +261,48 @@ module SiSU_HTML_seg def markup(para) @debug=[] format_head_seg=SiSU_HTML_Format_type::Head_seg.new(@md) - if para !~/^0~/ - m=para[/.+?<~(\d+);(?:[ohm]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/] + if para !~/^#{Rx[:meta]}/ + m=para[/.+?#{Mx[:id_o]}~(\d+);(?:[ohm]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/] if m paranum=m[1].to_s @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) end - if para =~/<:(?:code|alt|verse|group)>/m \ + if para =~/#{Mx[:gr_o]}(?:code|alt|verse|group)#{Mx[:gr_c]}/m \ or @@flag_alt==true - if para =~/<:(?:code|alt|verse|group)>/m + if para =~/#{Mx[:gr_o]}(?:code|alt|verse|group)#{Mx[:gr_c]}/m @group_collect=[] #unless @group_collect.class == Array - @group_collect << @vz.margin_txt_0 + para.gsub(/<:(?:code|alt|verse|group)-end>/m,'') #watch ! + @group_collect << @vz.margin_txt_0 + para.gsub(/#{Mx[:gr_o]}(?:code|alt|verse|group)-end#{Mx[:gr_c]}/m,'') #watch ! @@flag_alt=true elsif @@flag_alt==true - @group_collect << if para !~/<:(?:code|alt|verse|group)-end>/m # neither ideal nor necessary sort later + @group_collect << if para !~/#{Mx[:gr_o]}(?:code|alt|verse|group)-end#{Mx[:gr_c]}/m # neither ideal nor necessary sort later para else - para.gsub(/<:(?:code|alt|verse|group)-end>/m,'') + para.gsub(/#{Mx[:gr_o]}(?:code|alt|verse|group)-end#{Mx[:gr_c]}/m,'') end end - if para =~/<:(?:code|alt|verse|group)-end>/m + if para =~/#{Mx[:gr_o]}(?:code|alt|verse|group)-end#{Mx[:gr_c]}/m para=@group_collect.flatten.join @@flag_alt=false @group_collect=[] end end - if para !~/^[0-9]~/ - if para =~/(.*)<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>(.*)/ + if para !~/^#{Mx[:lv_o]}[1-9]:|#{Rx[:meta]}/ + if para =~/(.*)#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}(.*)/ one,two=$1,$2 format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) para=format_seg.no_paranum end end - if para[/<~(\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+><#@dp:#@dp)>$/] - @sto=SiSU_HTML::Source::Split_text_object.new(@md,para).lev_segname_para_ocn - format_txt_obj=SiSU_HTML_Format_type::Format_text_object.new(@md,@sto.text) if @sto.format =~/i[1-9]|_[1-9]?\*|<:i[1-9]>\s*_\*|null/ + if para[/#{Mx[:id_o]}~(\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp)#{Mx[:id_c]}$/] + @sto=SiSU_HTML::Source::Split_text_object.new(@md,para).html_seg + format_txt_obj=SiSU_HTML_Format_type::Format_text_object.new(@md,@sto.text) if @sto.format =~/i[1-9]|null/ #watch para=case @sto.format # work area 2003w29 ||@|def lev_segname_para_ocn| - when /^4~\S+/; @sto.seg_lev_para_ocn.header4 # work on see Split_text_object - when /^5~(?:~\S+)?/; @sto.seg_lev_para_ocn.header5 - when /^6~(?:~\S+)?/; @sto.seg_lev_para_ocn.header6 - when /^_\*$/; @sto.seg_lev_para_ocn.bullet - when /^_([1-9])\*$/ #indent levels 1-9 with bullet + when /^4:/; @sto.seg_lev_para_ocn.header4 # work on see Split_text_object + when /^5:/; @sto.seg_lev_para_ocn.header5 + when /^6:/; @sto.seg_lev_para_ocn.header6 + when /^#{Mx[:gl_bullet]}/ + @sto.seg_lev_para_ocn.bullet + when /^#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}#{Mx[:gl_bullet]}/ #indent levels 1-9 with bullet format_txt_obj.gsub_body para=@sto.seg_lev_para_ocn.format('li',"i#{$1}") when /^i([1-9])$/ #indent levels 1-9 @@ -313,18 +314,18 @@ module SiSU_HTML_seg @sto.seg_lev_para_ocn.code when /null/ if para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/ \ - and para !~/^/ + and para !~/^#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/ format_txt_obj.gsub_body @sto.seg_lev_para_ocn.para elsif para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/ \ - and para =~/^/ + and para =~/^#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/ format_txt_obj.gsub_body @sto.seg_lev_para_ocn.table_end else para end else para end - elsif para =~/¡|/,%{" href=\"endnotes#{@md.sfx}#_\\1">}) #endnote- twice #removed file type end if para !~/#{@vz.margin_txt_w1}|#{@vz.margin_txt_w2}/ - if para[/(.*)<~0;(?:u|[0-6]:)\d+;\w\d+><#@dp:#@dp>(.*)/] #% watch u & m? + if para[/(.*)#{Mx[:id_o]}~0;(?:u|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}(.*)/] #% watch u & m? one,two=$1,$2 format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) - para=format_seg.seg_no_paranum #% undefined +#FIX --> not that undefined, rather is not needed, should not be visited, and remove + #para=format_seg.seg_no_paranum #% undefined end para.gsub!(/\s*(-\{{2}~\d+|<:e[:_]\d+>).*/,'') #potentially dagerous - removes all paragraphs with #?? workpoint if para =~/ / #endnote- note- @@ -343,8 +345,8 @@ module SiSU_HTML_seg para=format_seg.no_paranum end end - if para =~/^4~\S+|4~!/ - para.gsub!(/4~\S+|<:[-_\w\d]?(-.+?-)?>|4~!.+/,'') #sort seg headers + if @sto.format=~/4:\S+/ + para.gsub!(/^\s*4:\S+\s*|<:[-_\w\d]?(-.+?-)?>|4~!.+/m,'') #sort seg headers @@seg[:main] << para @@seg[:main] << @@seg_subtoc[@@get_hash_fn] #% insertion of sub-toc else @@ -398,39 +400,39 @@ module SiSU_HTML_seg data.each do |para| para.gsub!(/(.+?)<\/a>/mi,'\1') if @md.flag_auto_endnotes - if para =~/^[1234]~/ \ + if para =~/^#{Mx[:lv_o]}[1234]:/ \ and not @@fn.empty? @@seg_endnotes[@@fn]=[] @@seg_endnotes[@@fn] << @@seg_endnotes_array - @@seg_endnotes_array=[] if para=~/^4~/ - @@fns_previous=@md.fns if para=~/^1~meta/ + @@seg_endnotes_array=[] if para=~/^#{Mx[:lv_o]}4:/ + @@fns_previous=@md.fns if para=~/^#{Mx[:lv_o]}1:meta#{Mx[:lv_c]}/ end - if para =~/^4~/ #% EXTRACTION OF SUB-TOCs + if para =~/^#{Mx[:lv_o]}4:/ #% EXTRACTION OF SUB-TOCs @@seg_subtoc[@@fn]=@@seg_subtoc_array @@seg_subtoc_array=[] end - if para =~/^4~/ #% SEGMENT NAME, after EXTRACTION OF ENDNOTES & SUB-TOCs - if para !~/^4~metadata/ - m=para[/^4~(\S+).+?<~(\d+);(?:[oh]|4:)\d+;\w\d+><#@dp:#@dp>$/] + if para =~/^#{Mx[:lv_o]}4:/ #% SEGMENT NAME, after EXTRACTION OF ENDNOTES & SUB-TOCs + if para !~/^#{Mx[:lv_o]}4:metadata#{Mx[:lv_c]}/ + m=para[/^#{Mx[:lv_o]}4:(\S+?)#{Mx[:lv_c]}.+?#{Mx[:id_o]}~(\d+);(?:[oh]|4:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/] end if m; @@fn,@@to_lev4=$1,$2 if m # changed 2004w07 #endnotes and sub-tocs else - if para !~/^4~metadata/ - m=para[/^4~(\S+)/] + if para !~/^#{Mx[:lv_o]}4:metadata#{Mx[:lv_c]}/ + m=para[/^#{Mx[:lv_o]}4:(\S+?)#{Mx[:lv_c]}/] @@fn,@@to_lev4=$1,'nonum' if m # changed 2005w13 else @@fn='' end end end end - if para =~/^[56]~\S*\s+(.+)?<~(\d+);(?:h|[56]:)\d+;\w\d+><#@dp:#@dp>$/ + if para =~/^#{Mx[:lv_o]}[56]:\S*?#{Mx[:lv_c]}\s*(.+)?#{Mx[:id_o]}~(\d+);(?:h|[56]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ para.gsub!(/ <\/a>/,' ') case para # series changed 2002w42 - when /^5~\S*\s+(.+)?<~(\d+);(?:h|[56]:)\d+;\w\d+><#@dp:#@dp>$/ #remove [u]? req by pg texts, revist + when /^#{Mx[:lv_o]}5:\S*?#{Mx[:lv_c]}\s*(.+)?#{Mx[:id_o]}~(\d+);(?:h|[56]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #remove [u]? req by pg texts, revist one,two=$1,$2 format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) para=format_seg.subtoc_lev5 - when /^6~\S*\s+(.+)?<~(\d+);(?:h|[56]:)\d+;\w\d+><#@dp:#@dp>$/ + when /^#{Mx[:lv_o]}6:\S*?#{Mx[:lv_c]}\s*(.+)?#{Mx[:id_o]}~(\d+);(?:h|[56]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ one,two=$1,$2 format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) para=format_seg.subtoc_lev6 @@ -438,27 +440,27 @@ module SiSU_HTML_seg @@seg_subtoc_array << para end if @md.flag_auto_endnotes - if para =~/~[{\[][\d*+]+ / # endnote- + if para =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[\d*+]+ /) try.each do |e| format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,e) - note_match=if e =~/<:i[1-9]>/ + note_match=if e =~/#{Mx[:pa_o]}i[1-9]#{Mx[:pa_c]}/ format_seg.endnote_body_seg_tail_indent else format_seg.endnote_body_seg_tail end @@ -466,14 +468,14 @@ module SiSU_HTML_seg end try.join('
') #% creation of separate end segment/page of all endnotes referenced back to reference segment - m=/(?:~\{[\d*+]+|~\[[*+]\d+)\s+(.+?href=")(#-[\d*+]+".+)[}\]]~/mi + m=/(?:#{Mx[:en_a_o]}[\d*+]+|#{Mx[:en_b_o]}[*+]\d+)\s+(.+?href=")(#-[\d*+]+".+)(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/mi one=note_match_seg[m,1] #note~ [a name] two=note_match_seg[m,2] #note- format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) note_match_all_seg=format_seg.endnote_seg_body(@@fn) #BUG WATCH 200408 @@seg[:endnote_all] << note_match_all_seg end - para.gsub!(/~[{\[].+?[}\]]~\s*/m,' ') + para.gsub!(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]}).+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})\s*/m,' ') end end end diff --git a/lib/sisu/v0/html_table.rb b/lib/sisu/v0/html_table.rb index 556c46be..1aed4f5d 100644 --- a/lib/sisu/v0/html_table.rb +++ b/lib/sisu/v0/html_table.rb @@ -102,39 +102,39 @@ module SiSU_HTML_table m=@parablock[//,1] @@tablefoot << m if m @parablock.gsub!(//,'') - @@tablehead=1 if @parablock =~//u; @parablock=table_head($1) + @@tablehead=1 if @parablock =~/#{Mx[:gr_o]}Th#{Mx[:tc_p]}/u + if @parablock =~/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}.+?#{Mx[:tc_p]}~(\d+);\w\d+;\w\d+#{Mx[:gr_c]}/u; @parablock=table_head($1) end - if @parablock =~// + if @parablock =~/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/ tablefoot=[] @@tablefoot.each {|x| tablefoot << ''} @@tablefoot=[] - if @parablock =~//; @parablock=table_end + if @parablock =~/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/; @parablock=table_end end end if @@tablehead == 1 - if @parablock =~/¡¡/u - if @parablock =~// - @parablock.gsub!(/!>/,table_row_close(true)) + if @parablock =~/#{Mx[:tc_c]}/ + @parablock.gsub!(/#{Mx[:tc_c]}/,table_row_close(true)) end @@tablehead=0 end @parablock else - if @parablock =~// - @parablock.gsub!(/!>/,table_row_close) + if @parablock =~/#{Mx[:tc_c]}/ + @parablock.gsub!(/#{Mx[:tc_c]}/,table_row_close) end @parablock end diff --git a/lib/sisu/v0/html_tune.rb b/lib/sisu/v0/html_tune.rb index 8cd8841e..11976af9 100644 --- a/lib/sisu/v0/html_tune.rb +++ b/lib/sisu/v0/html_tune.rb @@ -101,6 +101,8 @@ module SiSU_Tune @html=html end def clean + @html.gsub!(/#{Mx[:gl_o]}(#[0-9]+)#{Mx[:gl_c]}/u,'&\1;') + @html.gsub!(/#{Mx[:gl_o]}#([a-z]{2,4})#{Mx[:gl_c]}/u,'&\1;') @html.gsub!(/¢/u,'¢') # ¢ @html.gsub!(/£/u,'£') # £ @html.gsub!(/¥/u,'¥') # ¥ @@ -208,9 +210,7 @@ module SiSU_Tune tell.txt_grey unless @md.cmd =~/q/ data=Tune.new(@data,@md).endnotes_html data=Tune.new(data,@md).url_markup - if @sys.locale =~/utf-?8/i - data=Tune.new(data,@md).utf8_markup - end + data=Tune.new(data,@md).markup if @md.cmd =~/M/ #Hard Output Tune Optional on/off here data=Output.new(data,@md).hard_output Output.new(data,@md).marshal @@ -224,22 +224,30 @@ module SiSU_Tune data=@data @tuned_file=[] data.each do |para| - para.gsub!(/(\d~(\S+))/,'\1
#\2. ') + para.gsub!(/#{Mx[:lv_o]}\d:(\S?)#{Mx[:lv_c]}/,'\0#\1. ') @tuned_file << para end end - def utf8_markup + def markup @tuned_file=[] @data.each do |para| #@utf8.new(para).html #@utf8.html(@para) - if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn - #¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûü - #¢£¥§©ª«®°±²³µ¶¹º»¼½¾×÷ - ##para.gsub!(//, '&#;') - ##para.gsub!(//, '&;') - para=SiSU_Tune::Clean_html.new(para).clean - end + para.gsub!(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;') + para.gsub!(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;') + para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
') + para.gsub!(/(?:\s*#{Mx[:br_page]}\s*|\s*#{Mx[:br_page_new]}\s*)+/m,'


') # else clean '' + para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') + para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') + para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') + para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1') + para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1') + para.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1') + para.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1') + para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') + para.gsub!(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'') + para.gsub!(/<(p|br)>/,'<\1 />') + para=SiSU_Tune::Clean_html.new(para).clean @tuned_file << para end end @@ -287,12 +295,10 @@ module SiSU_Tune data=@data @tuned_file=[] data.each do |para| - para.gsub!(/<:name\#(\S+?)>/,'') + #para.gsub!(/#{Mx[:mk_o]}name#(\S+?)#{Mx[:mk_c]}/,'') para.gsub!(/<-#>/,'') - para.gsub!(/<:p[bn]>/,'') - para.gsub!(/<(p|br)>/,'<\1 />') - para.gsub!(/<:br>/,'
') - unless para =~/^<:code>/ + #para.gsub!(/<(p|br)>/,'<\1 />') + unless para =~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/ if para =~/<::\s+/ #watch para.gsub!(/<::\s+(\S+?)\s+!>/, %{\\1}) @@ -308,11 +314,11 @@ module SiSU_Tune %{}) end if para =~/\{.+?\}((?:https?|file|ftp)\S+|image)/ - @word_mode=para.scan(/\{.+?\}(?:(?:https?|file|ftp)\S+|image)|\S+/) + @word_mode=para.scan(/\{.+?\}(?:(?:https?|file|ftp)\S+|image)|(?:#{Mx[:gl_o]}\S+?#{Mx[:gl_c]})+|\S+/u) words=urls(@word_mode) para.gsub!(/.+/m,words) end - if (para !~/^0~|^<:code>/) + if (para !~/^#{Mx[:meta_o]}|^#{Mx[:gr_o]}code#{Mx[:gr_c]}/) para.gsub!(/\\copyright/i,%{©}) if (para !~/\<:ad\s+\.\.\//) para.gsub!(/\<:ad\s+(\S+)?\s+(\S+\.png)\s+(.+)?\;\s+(.+)?\;\s*!\>/, @@ -327,11 +333,11 @@ module SiSU_Tune para.gsub!(/<:to(\d{1,7}?)>/,'to { \1 } ') if para =~/\b\S+\@\S+?\.\S+/ \ and para !~/(\"\S+\@\S+?\.\S+\"|>\S+\@\S+?\.\S+?<)/ - para.gsub!(/\b(\S+\@\S+?\.\S+)(\s)/,'<\1>\2') + para.gsub!(/([a-zA-Z0-9._-]+\@\S+?\.[a-zA-Z0-9._-]+)/,'<\1>') end para.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\2') #http ftp matches escaped, no decoration para.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\2\3') #special case \{ e.g. \}http://url - para.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #http ftp matches with decoration + para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #http ftp matches with decoration if para =~/..\/\S+/ \ and para !~/(\"..\/\S+?\"|>\s*..\/\S+<)/ para.gsub!(/(\.\.\/\S+)/,'\1') @@ -349,14 +355,14 @@ module SiSU_Tune data=@data @tuned_file=[] data.each do |para| - unless para =~/^<:code>/ - para.gsub!(/(~[{])(\d+) (.+?) <#@dp>([}]~)/, + unless para =~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/ + para.gsub!(/(#{Mx[:en_a_o]}|#{Mx[:en_b_o]})(\d+)\s+(.+?) #{Mx[:id_o]}#@dp#{Mx[:id_c]}(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/, '  \2  ' + #note- endnote- '\1\2  \2. \3 \4') #endnote- note- (careful may have switched) - para.gsub!(/(~\[)([*+]\d+) (.+?) <#@dp>(\]~)/, + para.gsub!(/(#{Mx[:en_b_o]})([*+]\d+)\s+(.+?) #{Mx[:id_o]}#@dp#{Mx[:id_c]}(#{Mx[:en_b_c]})/, '  \2  ' + #note- endnote- '\1\2  \2. \3 \4') #endnote- note- (careful may have switched) - para.gsub!(/(~\{)([*+]+) (.+?) <#@dp>(\}~)/, + para.gsub!(/(#{Mx[:en_a_o]})([*+]+)\s+(.+?) #{Mx[:id_o]}#@dp#{Mx[:id_c]}(#{Mx[:en_a_c]})/, '  \2  ' + #note- endnote- '\1\2  \2 \3 \4') #endnote- note- (careful may have switched) end diff --git a/lib/sisu/v0/hub.rb b/lib/sisu/v0/hub.rb index af93c736..b3815828 100644 --- a/lib/sisu/v0/hub.rb +++ b/lib/sisu/v0/hub.rb @@ -419,8 +419,8 @@ p "here #{__FILE__} #{__LINE__}" if @opt =~/M/ end if @opt.cmd =~/[hHz]/; op('html','html') #% -h -H -z html css end - if @opt.cmd =~/A/; op('air','air') #% #-A - end + #if @opt.cmd =~/A/; op('air','air') #% #-A + #end if @opt.cmd =~/a/; op('plaintext','plaintext') #% -a #-A -f -e -E plaintext -a creates ms-dos type; -A creates unix type, plaintext file end if @opt.cmd =~/g/; op('wikispeak','wikispeak') #% -g wiki diff --git a/lib/sisu/v0/manifest.rb b/lib/sisu/v0/manifest.rb index 4861097e..6bdfdae4 100644 --- a/lib/sisu/v0/manifest.rb +++ b/lib/sisu/v0/manifest.rb @@ -182,14 +182,64 @@ module SiSU_Manifest id,file='HTML, full length document',@md.fn[:doc] summarize(id,file,img) end - if FileTest.file?("#@base_path/#{@md.fn[:pdf_p]}")==true + #if FileTest.file?("#@base_path/#{@md.fn[:pdf_p]}")==true + # img='PDF portrait ' + # id,file="full length document (PDF portrait / vertical - recommended for printing), size #{@md.papersize_array[0]}",@md.fn[:pdf_p] + # summarize(id,file,img) + #end + #if FileTest.file?("#@base_path/#{@md.fn[:pdf_l]}")==true + # img='PDF landscape ' + # id,file="full length document (PDF landscape / horizontal - recommended for screen viewing), size #{@md.papersize_array[0]}",@md.fn[:pdf_l] + # summarize(id,file,img) + #end + if FileTest.file?("#@base_path/#{@md.fn[:pdf_p_letter]}")==true img='PDF portrait ' - id,file='PDF document portrait/vertical (recommended for printing)',@md.fn[:pdf_p] + id,file="PDF, U.S. letter size, portrait/vertical document (recommended for printing)",@md.fn[:pdf_p_letter] summarize(id,file,img) end - if FileTest.file?("#@base_path/#{@md.fn[:pdf_l]}")==true + if FileTest.file?("#@base_path/#{@md.fn[:pdf_l_letter]}")==true img='PDF landscape ' - id,file='PDF document landscape/horizontal (recommended for screen viewing)',@md.fn[:pdf_l] + id,file="PDF, U.S. letter size, landscape/horizontal document (recommended for screen viewing)",@md.fn[:pdf_l_letter] + summarize(id,file,img) + end + if FileTest.file?("#@base_path/#{@md.fn[:pdf_p_a4]}")==true + img='PDF portrait ' + id,file="PDF, A4 size, portrait/vertical document (recommended for printing)",@md.fn[:pdf_p_a4] + summarize(id,file,img) + end + if FileTest.file?("#@base_path/#{@md.fn[:pdf_l_a4]}")==true + img='PDF landscape ' + id,file="PDF, A4 size, landscape/horizontal document (recommended for screen viewing)",@md.fn[:pdf_l_a4] + summarize(id,file,img) + end + if FileTest.file?("#@base_path/#{@md.fn[:pdf_p_a5]}")==true + img='PDF portrait ' + id,file="PDF, A5 (book) size, portrait/vertical document (recommended for printing)",@md.fn[:pdf_p_a5] + summarize(id,file,img) + end + if FileTest.file?("#@base_path/#{@md.fn[:pdf_l_a5]}")==true + img='PDF landscape ' + id,file="PDF, A5 (book) size, landscape/horizontal document (recommended for screen viewing)",@md.fn[:pdf_l_a5] + summarize(id,file,img) + end + if FileTest.file?("#@base_path/#{@md.fn[:pdf_p_b5]}")==true + img='PDF portrait ' + id,file="PDF, B5 (book) size, portrait/vertical document (recommended for printing)",@md.fn[:pdf_p_b5] + summarize(id,file,img) + end + if FileTest.file?("#@base_path/#{@md.fn[:pdf_l_b5]}")==true + img='PDF landscape ' + id,file="PDF, B5 (book) size, landscape/horizontal document (recommended for screen viewing)",@md.fn[:pdf_l_b5] + summarize(id,file,img) + end + if FileTest.file?("#@base_path/#{@md.fn[:pdf_p_legal]}")==true + img='PDF portrait ' + id,file="PDF, U.S. legal size, portrait/vertical document (recommended for printing)",@md.fn[:pdf_p_legal] + summarize(id,file,img) + end + if FileTest.file?("#@base_path/#{@md.fn[:pdf_l_legal]}")==true + img='PDF landscape ' + id,file="PDF, U.S. legal size, landscape/horizontal document (recommended for screen viewing)",@md.fn[:pdf_l_legal] summarize(id,file,img) end if FileTest.file?("#@base_path/#{@md.fn[:odf]}")==true diff --git a/lib/sisu/v0/manpage.rb b/lib/sisu/v0/manpage.rb index e267de48..06878094 100644 --- a/lib/sisu/v0/manpage.rb +++ b/lib/sisu/v0/manpage.rb @@ -68,6 +68,7 @@ module SiSU_manpage require "#{SiSU_lib}/manpage_format" include Format require "#{SiSU_lib}/shared_txt" + require "#{SiSU_lib}/shared_structure" pwd=Dir.pwd @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0 @@tablefoot='' @@ -100,54 +101,6 @@ module SiSU_manpage end end private - class Split_text_object ).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - if /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,$5 - elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@lev,@text,@ocn=$1,$2,$3,$4 - elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@text,@ocn=$1,$2,$3 - elsif /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;[um]\d+><#@dp:#@dp>$/m.match(@para) - @@alt_id_count+=1 - @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,"x#{@@alt_id_count}" - elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @@alt_id_count+=1 - @format,@lev,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}" - end - else - if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @text,@ocn=$1,$2 - end - if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/ #added 2002w06 - @text=/(.+?)/m.match(@para)[1] - end - if /^((\d)~(?:~\S+)?)\s+(.+)/m.match(@para) - @format,@lev,@text=$1,$2,$3 - end - end - format=@format.dup - @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - Format::Format_text_object.new(format,@text,@ocn) - else - Format::Format_text_object.new(format,@text,"<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>") - end - self - end - end class Scroll \s*)?\d~(?:(\S+))?\s+)?(.+?)\s*<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m # 2004w18 pb pn removal added + @regx=/^(?:(?:#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}\s*)?#{Mx[:lv_o]}\d:(\S*?)#{Mx[:lv_c]})?\s*(.+?)\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m # 2004w18 pb pn removal added @tab="\t" @br="\n" @@dostype='unix endnotes' @@ -171,12 +124,12 @@ module SiSU_manpage end # Used for extraction of endnotes from paragraphs def extract_endnotes(para='') - notes=para.scan(/~[{\[]([\d*+]+\s+.+?)\s*<#@dp>[}\]]~/) + notes=para.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m) @n=[] notes.each do |n| #high cost to deal with
appropriately within manpage, consider n=n.dup.to_s - if n =~// - fix = n.split(//) #watch #added + if n =~/#{Mx[:br_line]}/ + fix = n.split(/\s*#{Mx[:br_line]}+\s*/) #watch #added fix.each do |x| unless x.empty?; @n << x end @@ -201,7 +154,7 @@ module SiSU_manpage GSUB ) else - wrap.gsub!(/^(.+)\Z/m, </ + if paragraph =~/#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}/ m=$1.to_i - paragraph.gsub!(/<:i#{m}>/,'') + paragraph.gsub!(/#{Mx[:pa_o]}:i#{m}#{Mx[:pa_c]}/,'') util=SiSU_text_utils::Wrap.new(paragraph,78,m*2) else util=SiSU_text_utils::Wrap.new(paragraph,78,0) end @@ -277,6 +230,7 @@ WOK w.gsub!(/^(\\\.)/,' \1') w end + #wrapped.gsub!(/(?:#{Mx[:br_line]}|#{Mx[:br_nl]})\s*/m,"\n\n") if wrapped if lv times=wrapped.length times=78 if times > 78 @@ -312,106 +266,107 @@ WOK table_message='[table omitted, see other document formats]' fix=[] data.each do |para| - para.gsub!(//,'') # remove dummy headings (used by html) #check - para.gsub!(/_\*\s+/,'* ') # bullet markup, marked down - para.gsub!(/(.+?)<\/sup>/,'^\1^') - para.gsub!(/(.+?)<\/sub>/,'[\1]') - para.gsub!(/(.+?)<\/i>/,'<:br>.I \1<:br>') - para.gsub!(/\A(.+?)<\/b>
/m,'<:br>.BI \1<:br>') - para.gsub!(/(.+?)<\/b>/,'<:br>.B \1<:br>') - para.gsub!(/(.+?)<\/u>/,'<:br>.I \1<:br>') - unless para =~/<:code>/ + para.gsub!(/#{Mx[:gl_bullet]}\s*/,'* ') # bullet markup, marked down + para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'^\1^') + para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'[\1]') + para.gsub!(/\A\s*#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}#{Mx[:br_line]}/m,"#{Mx[:br_line]}.I \\1#{Mx[:br_line]}") + para.gsub!(/\s*#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/m,"#{Mx[:br_line]}.I \\1#{Mx[:br_line]}") + para.gsub!(/\A\s*#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}#{Mx[:br_line]}/m,"#{Mx[:br_line]}.BI \\1#{Mx[:br_line]}") + para.gsub!(/\s*#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,"#{Mx[:br_line]}.B \\1#{Mx[:br_line]}") + para.gsub!(/\s*#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,"#{Mx[:br_line]}.I \\1#{Mx[:br_line]}") + unless para =~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/ para.gsub!(/(?:^|\s)\{(.+?)\}((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1 #{@url_brace.txt_open}\\2#{@url_brace.txt_close}\\3") - para.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.txt_open}\\2#{@url_brace.txt_close}\\3") + para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.txt_open}\\2#{@url_brace.txt_close}\\3") @manpage[:endnotes]=extract_endnotes(para) - para.gsub!(/~[{\[]([\d*+]+)\s+(?:.+?)[}\]]~/,'[^\1]') # endnote marker marked up - para.gsub!(/&/,'&') - para.gsub!(/!/,'!') - para.gsub!(/#/,'#') - para.gsub!(/*/,'*') - para.gsub!(/-/,'-') - para.gsub!(///,'/') - para.gsub!(/_/,'_') - para.gsub!(/{/,'{') - para.gsub!(/}/,'}') - para.gsub!(/~/,'~') - para.gsub!(/©/,'©') + para.gsub!(/#{Mx[:en_a_o]}([\d*+]+)\s*(?:.+?)#{Mx[:en_a_c]}/m,'[^\1]') # endnote marker marked up + para.gsub!(/#{Mx[:en_b_o]}([\d*+]+)\s*(?:.+?)#{Mx[:en_b_c]}/m,'[^\1]') # endnote marker marked up + #para.gsub!(/~[{\[]([\d*+]+)\s+(?:.+?)[}\]]~/,'[^\1]') # endnote marker marked up + para.gsub!(/#{Mx[:gl_o]}#amp#{Mx[:gl_c]}/,'&') ##{Mx[:gl_o]}#095#{Mx[:gl_c]} + para.gsub!(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!') + para.gsub!(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#') + para.gsub!(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*') + para.gsub!(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-') + para.gsub!(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/') + para.gsub!(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_') + para.gsub!(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{') + para.gsub!(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}') + para.gsub!(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~') + para.gsub!(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©') end para.gsub!(/(^| |\s|\*)\\\*/,'\1\\\\\*') #man page requires para.gsub!(/\s\.(\S+)/,' \\.\1') para.gsub!(/(\n\.)(\S\S\S+)/m,'\1\\.\2') para.gsub!(/-/,'\-') #manpages use this para.gsub!(/~/,'~') if para #manpages use this - if para =~/<:(?:group|verse|alt|code)(?:-end)?>(?:\s+<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>)?/ - if para =~/<:code>/ #code-block: angle brackets special characters + if para =~/#{Mx[:gr_o]}(?:group|verse|alt|code)(?:-end)?#{Mx[:gr_c]}(?:\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_c]}#@dp:#@dp#{Mx[:id_c]})?/ + if para =~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/ #code-block: angle brackets special characters para.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _> _< para.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _<_< #para.gsub!(/(?![}])_([<>])/m,'\1') # _> _< }_< end - para.gsub!(//,"\n\n.P\n\n") # watch - para.gsub!(/<:(?:group|verse|alt)(?:\\-end)?>(?:\s+<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>)?/,'') - para.gsub!(/<:code>(?:\s+<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>)?/,"\n\n.nf\n\n") - para.gsub!(/<:code\\-end>(?:\s+<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>)?/,"\n\n.fi\n\n") + para.gsub!(/(?:#{Mx[:br_line]}|#{Mx[:br_nl]})+\s*/,"\n\n.BR\n\n") # watch + para.gsub!(/#{Mx[:gr_o]}(?:group|verse|alt)(?:\\-end)?#{Mx[:gr_c]}(?:\s+#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/,'') + para.gsub!(/#{Mx[:gr_o]}code#{Mx[:gr_c]}(?:\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/,"\n\n.nf\n\n") + para.gsub!(/#{Mx[:gr_o]}code\\-end#{Mx[:gr_c]}(?:\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/,"\n\n.fi\n\n") else - para.gsub!(//,"\n\n") #watch introduces a bug + para.gsub!(/(?:#{Mx[:br_line]}|#{Mx[:br_nl]})\s*/,"\n\n") #unless para =~/#{Mx[:lv_o]}\d:/ #watch introduces a bug end blit=para.scan(/\[[^\]]+\]|[^\[]+/) blit_array=[] blit.each do |x| if x =~/^\[/ x.gsub!(/\s+/,' \ ') #manpages use this - elsif x =~/\.(?:TP|BI)\s/ - x.gsub!(/\s+/,' \ ') #manpages use this else x end blit_array << x end - para = blit_array.join - para.gsub!(/\s\\\s+(<:br>|)/,'\1') #a messy solution - para.gsub!(/\s(\[)/,' \ \1') #manpages use this - para.gsub!(/<:p[bn]>/,'') # remove page breaks - para.gsub!(/^\s*<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/,'') # remove empty lines - check - para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') + para=blit_array.join + para.gsub!(/#{Mx[:gl_o]}:name#\S+?#{Mx[:gl_c]}/mi,'') #added + para.gsub!(/\s\\\s+(#{Mx[:br_line]}|#{Mx[:br_nl]})/,'\1') #a messy solution + para.gsub!(/#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}/,'') # remove page breaks + para.gsub!(/^\s*#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/,'') # remove empty lines - check + para.gsub!(/(^|#{Mx[:gl_c]}|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') para.gsub!(/(.+?)<\/a>/m,'\1') - para.gsub!(/<:name#\S+?>/,'') # remove name links + para.gsub!(/#{Mx[:mk_o]}name#\S+?#{Mx[:mk_c]}/,'') # remove name links para.gsub!(/ /,' ') # decide on para.gsub!(/(["''])/,"\\\\\\1") # quotation marks need escape para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") para.gsub!(/^(?:^|[^_\\])\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') wordlist=para.scan(/\S+/) - if para =~/^0~(\S+)\s+(.+?)\Z/m # for headers + if para =~/^#{Rx[:meta]}\s*(.+?)\Z/m # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta if d_meta; manpage_metadata(d_meta) end end - if para !~/(^0~||)/ + if para !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ if para =~@regx #/.+?<~\d+;\w\d+;\w\d+>.*/ #watch change paranum=para[@regx,3] @p_num=Format::Paragraph_number.new(paranum) end - @sto=Split_text_object.new(para).lev_segname_para_ocn + @sto=SiSU_Structure::Split_text_object.new(@md,para).txt ### problem in scroll, it appears tables are getting paragraph numbers - m=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + m=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ if para =~m \ and para=~/\S+/ para=case @sto.format - when /^(1)~(?:(\S+))?/ + when /^(1):(\S*)/ manpage_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body1 - when /^(2)~(?:(\S+))?/ + when /^(2):(\S*)/ manpage_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body2 - when /^(3)~(?:(\S+))?/ + when /^(3):(\S*)/ manpage_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body3 - when /^(4)~(\S+)/ # work on see SiSU_text_parts::Split_text_object + when /^(4):(\S+)/ # work on see SiSU_text_parts::Split_text_object manpage_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body4 - when /^(5)~(?:(\S+))?/ + when /^(5):(\S*)/ manpage_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body5 - when /^(6)~(?:(\S+))?/ + when /^(6):(\S*)/ manpage_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body6 #when /^(i1)$/ @@ -440,17 +395,17 @@ WOK elsif para =~/#{table_message}/ @manpage[:body] << para << @br elsif para =~/(Note|Endnotes?)/ \ - and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ elsif para =~/(MetaData)/ \ - and para =~/<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/ #debug 2003w46 add rc info ####suspect visit + and para =~/#{Mx[:id_o]}~(\d+);[um]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #debug 2003w46 add rc info ####suspect visit #formatMono=MonoSiSU.new('
MetaData') #para=formatMono.bold_para elsif para.include? 'Owner Details' \ - and para !~/<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + and para !~/#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #formatMono=MonoSiSU.new('
Owner Details') #@@manpage[:owner_details]=formatMono.bold_para #para='' - elsif para =~/(¡|(.*)/ one,two=$1,$2 format_text=Format_text_object.new(one,two) @@ -461,7 +416,7 @@ WOK para='' end case para - when /<:i[1-9]>/ + when /#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/ if para =~/.*<:#>.*$/m format_text=Format_text_object.new(para,'') para=format_text.scr_indent_one_no_paranum @@ -475,10 +430,14 @@ WOK format_text=Format_text_object.new(one,two) para=format_text.center end - para.gsub!(/~/,'~') if para #manpages use this - para.gsub!(/{/,'{') if para #manpages use this + para.gsub!(/\s(\[)/m,' \ \1') if para #manpages use this + para.gsub!(/(?:#{Mx[:br_line]}|#{Mx[:br_nl]})/,"\n\n") if para + para.gsub!(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~') if para #manpages use this + para.gsub!(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{') if para #manpages use this + para.gsub!(/#{Mx[:pa_o]}\S+#{Mx[:pa_c]}/,' ') if para ## Clean Prepared Text para.gsub!(//,' ') if para ## Clean Prepared Text para.gsub!(/<:\S+>/,' ') if para ## Clean Prepared Text + para end end @manpage diff --git a/lib/sisu/v0/manpage_format.rb b/lib/sisu/v0/manpage_format.rb index 978cc6b4..02fbf216 100644 --- a/lib/sisu/v0/manpage_format.rb +++ b/lib/sisu/v0/manpage_format.rb @@ -81,10 +81,10 @@ module Format def initialize(one,two,three) one.gsub!(/\.(html|pdf|php)/,'') if one =~/\.\.\/\S+/ @one,@two,@three=one,two,three - rgx=/^[1-6-]~/ + rgx=/^#{Mx[:lv_o]}[1-6-]:/ @one.gsub!(rgx,'') if @one =~rgx @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern - rgx=/~\{[\d*+]+\s+(.+?)<#@dp>\}~/ + rgx=/#{Mx[:en_a_o]}[\d*+]+\s+(.+?)#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/ @one.gsub!(rgx,'\1') if @one =~rgx @link,@linkname=one,two @vz=SiSU_Env::Get_init.instance.skin diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb index 1883bdba..09c67ff6 100644 --- a/lib/sisu/v0/odf.rb +++ b/lib/sisu/v0/odf.rb @@ -68,6 +68,7 @@ module SiSU_ODF require "#{SiSU_lib}/odf_format" include SiSU_ODF_format require "#{SiSU_lib}/shared_txt" + require "#{SiSU_lib}/shared_structure" @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0 class Source require 'zlib' @@ -100,55 +101,6 @@ module SiSU_ODF end end private - class Split_text_object ).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - if /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,$5 - elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@lev,@text,@ocn=$1,$2,$3,$4 - elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@text,@ocn=$1,$2,$3 - elsif /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;[um]\d+><#@dp:#@dp>$/m.match(@para) - @@alt_id_count+=1 - @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,"x#{@@alt_id_count}" - elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @@alt_id_count+=1 - @format,@lev,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}" - end - else - if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @text,@ocn=$1,$2 - end - if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/ - @text=/(.+?)/im.match(@para)[1] - end - if /^((\d)~(?:~\S+)?)\s+(.+)/m.match(@para) - @format,@lev,@text=$1,$2,$3 - end - end - format=@format.dup - @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - SiSU_ODF_format::Format_text_object.new(format,@text,@ocn) - else - SiSU_ODF_format::Format_text_object.new(format,@text,"<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>") - end - self - end - end class Scroll \s*)?\d~(?:(\S+))?\s+)?(.+?)\s*<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - @serial=/\s*<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>\s*/ + @regx=/^(?:(?:#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}\s*)?#{Mx[:lv_o]}\d:(\S*?)#{Mx[:lv_c]})?\s*(.+?)\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ + @serial=/\s*#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}\s*/ @tab="\t" @url_brace=SiSU_Viz::Skin.new.url_decoration @br=if @md.cmd =~/M/; "\n" @@ -176,12 +128,12 @@ module SiSU_ODF end # Used for extraction of endnotes from paragraphs def extract_endnotes(para='') - notes=para.scan(/~\{(\d+\s+.+?)\s*<#@dp>\}~/) + notes=para.scan(/#{Mx[:en_a_o]}(\d+\s+.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/) @n=[] notes.each do |n| #high cost to deal with
appropriately within odf, consider n=n.dup.to_s - if n =~/<:?br(?: \/)?>/ - fix=n.split(/<:?br(?: \/)?>/) #watch #added + if n =~/#{Mx[:br_line]}/ + fix=n.split(/#{Mx[:br_line]}/) #watch #added fix.each do |x| if x =~/\S+/; @n << x end @@ -210,8 +162,8 @@ module SiSU_ODF end def heading(para,no) para.gsub!(@serial,'') - para.gsub!(/<:name#\S+?>/,'') - para.gsub!(/^([1-6])~\S*\s/,'') + para.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'') + para.gsub!(/^#{Mx[:lv_o]}(\d):\S*?#{Mx[:lv_c]}\s*/,'') m=/#{$1}/ breakpage='' if @md.fns \ @@ -307,19 +259,19 @@ module SiSU_ODF def normal(para) #P1 - P3 para.gsub!(@serial,'') para.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, - %{\\1\\2}) #http ftp matches escaped, no decoration + '\1\2') #http ftp matches escaped, no decoration para.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, '\1\2\3') #special case \{ e.g. \}http://url - para.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, + para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, %{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #http ftp matches with decoration #para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, also works #%{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}\\2}) #http ftp matches with decoration para.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/, %{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}}) para=case para - when /^<:i([1-9])>\s/m + when /^#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}/m m=$1 - para.gsub!(/^<:i#{m}>\s/m,'') + para.gsub!(/^#{Mx[:pa_o]}:i#{m}#{Mx[:pa_c]}/m,'') %{#{para}} else %{#{para}} end @@ -331,24 +283,24 @@ module SiSU_ODF end def footnote(para) @astx||=10000 - para.gsub!(/<#@dp>([}\]]~)/,'\1') + para.gsub!(/#{Mx[:id_o]}#@dp#{Mx[:id_c]}(#{Mx[:en_a_c]}|#{Mx[:en_a_c]})/,'\1') #para.gsub!(/
<:i[1-9]>/,'
') - if para =~/~\{\d+\s+/ - para=para.gsub(/~\{(\d+)\s+(.+?)\}~/,'\1 \2') + if para =~/#{Mx[:en_a_o]}\d+\s+/ + para=para.gsub(/#{Mx[:en_a_o]}(\d+)\s+(.+?)#{Mx[:en_a_c]}/,'\1 \2') end - if para=~/~\[[*+]\d+\s/ #editor notes, squre bracket series - asterisk=para.scan(/~\[([*+]\d+)\s+(.+?)\]~/) + if para=~/#{Mx[:en_b_o]}[*+]\d+\s/ #editor notes, squre bracket series + asterisk=para.scan(/#{Mx[:en_b_o]}([*+]\d+)\s+(.+?)#{Mx[:en_b_c]}/) asterisk.each do |x| a=x[0].gsub(/([*+])/,"\\\\\\1") - para=para.gsub(/~\[(#{a})\s+(.+?)\]~/,%{\\1 \\2}) + para=para.gsub(/#{Mx[:en_b_o]}(#{a})\s+(.+?)#{Mx[:en_b_c]}/,%{\\1 \\2}) @astx+=1 end end - if para=~/~\{[*+]+\s/ - asterisk=para.scan(/~\{([*+]+)\s+(.+?)\}~/) + if para=~/#{Mx[:en_a_o]}[*+]+\s/ + asterisk=para.scan(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)#{Mx[:en_a_c]}/) asterisk.each do |x| a=x[0].gsub(/([*+])/,"\\\\\\1") - para=para.gsub(/~\{(#{a})\s+(.+?)\}~/,%{\\1 \\2}) + para=para.gsub(/#{Mx[:en_a_o]}(#{a})\s+(.+?)#{Mx[:en_a_c]}/,%{\\1 \\2}) @astx+=1 end end @@ -358,15 +310,16 @@ module SiSU_ODF para.gsub!(/&nbsp;| /,' ') para.gsub!(//,'>') para.gsub!(/<(text:span text:style-name="T[1-5]"|\/text:span)>/,'<\1>') #works, not ideal + para.gsub!(/#{Mx[:br_line]}/,'
') para.gsub!(/<br(?:\s+\/)?>/,'
') #para.gsub!(/\s\s/,'  ') para end def poem(para) #P4 #same as group para.gsub!(@serial,'') - para.gsub!(/<:verse(?:-end)?>\s*/m,'') + para.gsub!(/#{Mx[:gr_o]}verse(?:-end)?#{Mx[:gr_c]}\s*/m,'') parray=[] - para.split(/<:?br(?: \/)?>/).each do |parablock| + para.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/).each do |parablock| parablock=group_clean(parablock) parray << %{#{parablock}} if parablock =~/\S+/ end @@ -374,9 +327,9 @@ module SiSU_ODF end def group(para) #P4 #same as verse para.gsub!(@serial,'') - para.gsub!(/<:group(?:-end)?>\s*/m,'') + para.gsub!(/#{Mx[:gr_o]}group(?:-end)?#{Mx[:gr_c]}\s*/m,'') parray=[] - para.split(/<:?br(?: \/)?>/).each do |parablock| + para.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/).each do |parablock| parablock=group_clean(parablock) parablock.gsub!(/<text:a xlink:type="simple" xlink:href="(.+?)">/m,'') parablock.gsub!(/<(\/text:a)>/,'<\1>') @@ -390,21 +343,21 @@ module SiSU_ODF end def code(para) #P5 para.gsub!(@serial,'') - para.gsub!(/<:code(?:-end)?>\s*/m,'') + para.gsub!(/#{Mx[:gr_o]}code(?:-end)?#{Mx[:gr_c]}\s*/m,'') para.gsub!(/\s\s/,'  ') parray=[] - para.split(/<:?br(?: \/)?>/).each do |parablock| + para.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/).each do |parablock| parablock=group_clean(parablock) parablock.gsub!(/^\s*$/,'
') parablock.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, - %{\\1\\2}) #http ftp matches escaped, no decoration + '\1\2') #http ftp matches escaped, no decoration parray << %{#{parablock}} if parablock =~/\S+/ end para=parray.join + '' para end def table(para) # - if para =~// + para=unless para=~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/ para=if para =~/\{\s*\S+?\.(?:png|jpg|gif)\s.+?\}(?:(?:https?|file|ftp):\S+|image)/; image(para) elsif para =~/\{.+?\}(?:(?:https?|file|ftp):\S+|image)/; text_link(para) else para @@ -431,16 +384,16 @@ module SiSU_ODF when 5; heading(para,'5') << @br*2 when 6; heading(para,'6') << @br*2 end - elsif para =~ /<:verse(?:-end)?>/ + elsif para =~ /#{Mx[:gr_o]}verse(?:-end)?#{Mx[:gr_c]}/ @@odf[:body] << poem(para) @@odf[:body] << @br*2 - elsif para =~ /<:group(?:-end)?>/ + elsif para =~ /#{Mx[:gr_o]}group(?:-end)?#{Mx[:gr_c]}/ @@odf[:body] << group(para) @@odf[:body] << @br*2 - elsif para =~ /<:code(?:-end)?>/ + elsif para =~ /#{Mx[:gr_o]}code(?:-end)?#{Mx[:gr_c]}/ @@odf[:body] << code(para) @@odf[:body] << @br*2 - elsif para =~ /<\-_&!@%~#\]\[*=$| \n+`¡]/u + safe_characters=/[^a-zA-Z0-9}{\/?,."';:)(><\-_&!@%~#\]\[*=$| \n+`#{Mx[:tc_p]}]/u dir=SiSU_Env::Info_env.new(@md.fns) @data_mod,@endnotes,@level,@cont,@copen,@odf_contents_close=Array.new(6){[]} @rcdc=false @@ -474,10 +427,10 @@ module SiSU_ODF word=para.scan(/\S+|\n/) if word word.each do |w| # _ - / # | : ! ^ ~ - unless w =~/<~\S+?;\S+?;\S+?><#@dp:#@dp>|<[:!][^<>]+?>|^<\/?(?:del|ins|sub|sup|cite)>$/ + unless w =~/#{Mx[:id_o]}~\S+?;\S+?;\S+?#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}|#{Mx[:gr_o]}.+?#{Mx[:gr_c]}|<[:!][^<>]+?>/ w.gsub!(/^<([^<>][^<>][^<>][^<>]+?)>$/,'<\1>') #refix end - unless para =~/^(?:0~|%+ )/m + unless para =~/^(?:#{Rx[:meta]}|%+ )/m w.gsub!(/&#(?:126|152);/,'~') #126 usual if w !~/&\S{1,7};/ \ or w =~/ / @@ -489,24 +442,32 @@ module SiSU_ODF para=para_array.join(' ') para=para.strip end - if para =~/<:code>/ #code-block: angle brackets special characters + if para =~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/ #code-block: angle brackets special characters para.gsub!(/(^|[^}])_/m,'\1>') para.gsub!(/(^|[^}])_/m,'\1>') end - para.gsub!(/^(<:i[1-9]>\s+)?_\*\s+/,'\1 ') # bullet_09.png + para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') + para.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1') + para.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1') + para.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'') + para.gsub!(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;') + para.gsub!(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;') + para.gsub!(/#{Mx[:mk_o]}[~-]##{Mx[:mk_c]}/,'') + para.gsub!(/^(#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}\s*)?#{Mx[:gl_bullet]}/,'\1 ') # bullet_09.png #para.gsub!(/^(<:i[1-9]>\s+)?_\*\s+/,'\1 ') #bullet #para.gsub!(/^(<:i[1-9]>\s+)?_\*\s+/,'\1● ') # bullet utf8, make smaller if used #para.gsub!(/^_\*\s+/,' ') #bullet - para.gsub!(/^(<:i[1-9]>)\s+_\*\s+/,'\1 · ') #bullet - para.gsub!(/<:?br>/,'
') - para.gsub!(/<:p[bn]>/,' ') + para.gsub!(/^(#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]})\s*#{Mx[:gl_bullet]}/,'\1 · ') #bullet + para.gsub!(/^#{Mx[:gl_bullet]}/,'· ') #bullet + para.gsub!(/#{Mx[:br_line]}/,'
') + para.gsub!(/#{Mx[:br_page]}|#{Mx[:br_page_new]}/,' ') para.gsub!(/©/,'©') #too arbitrary para.gsub!(/.+?<-#>/,'') # remove dummy headings (used by html) #check - para.gsub!(/(.+?)<\/b>/,'\1') - para.gsub!(/(.+?)<\/i>/,'\1') - para.gsub!(/(.+?)<\/u>/,'\1') - para.gsub!(/(.+?)<\/sup>/,'\1') - para.gsub!(/(.+?)<\/sub>/,'\1') + para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') + para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') + para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') + para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1') + para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1') para.gsub!(/`/,"'") para.gsub!(/­/u,'-') para.gsub!(/·/u,'*') @@ -520,10 +481,10 @@ module SiSU_ODF para.gsub!(/\44/,'$') #$ watch #para.gsub!(/^·/,'_*') #$ watch #para.gsub!(/·/,'*') #$ watch - para.gsub!(/<:p[bn]>/,'') # remove page breaks - para.gsub!(/^\s*<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>/,'') # remove empty lines + para.gsub!(/#{Mx[:br_page]}|#{Mx[:br_page_new]}/,'') # remove page breaks + para.gsub!(/^\s*#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'') # remove empty lines para.gsub!(/(.+?)<\/a>/,'\1') - para.gsub!(/<:name#\S+?>/,'') # remove name links + para.gsub!(/#{Mx[:mk_o]}name#\S+?#{Mx[:mk_c]}/,'') # remove name links # para.gsub!(/(.+?)<\/a>/im,'\1') #para.gsub!(/ /,' ') # decide on #para.gsub!(/\{(\S+?\.(?:png|jpg)) .+?\}(?:http:\/\/\S+|image)/," [ \\1 ]") #"[ #{@env.url.images_local}\/\\1 ]") @@ -533,38 +494,44 @@ module SiSU_ODF #para.gsub!(/^(\{\S+?\.(?:png|jpg)\s+.+?"(.*?)"\s*\}\S+)/,"\\1 \n [image: \"\\2\"]") wordlist=para.scan(/\S+/) para=tidywords(wordlist).join(' ').strip - if para =~/^0~(\S+)\s+(.+?)\Z/m # for headers + if para =~/^#{Rx[:meta]}(\S+)\s+(.+?)\Z/m # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta if d_meta; odf_metadata(d_meta) end end @rcdc=true if @rcdc==false \ - and (para =~/~metadata/ or para =~/1~meta\s+Document Information/) - if para !~/(^0~||)/ + and (para =~/~metadata/ or para =~/#{Mx[:lv_o]}1:meta#{Mx[:lv_x]}\s*Document Information/) + if para !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ if para =~@regx #/.+?<~\d+;\w\d+;\w\d+>.*/ #watch change paranum=para[@regx,3] @p_num=SiSU_ODF_format::Paragraph_number.new(paranum) end - @sto=Split_text_object.new(para).lev_segname_para_ocn + @sto=SiSU_Structure::Split_text_object.new(@md,para).odt #yyyy-mm-ddT00:00:00#{ocn} #followed immediately by paragraph closure ### problem in scroll, it appears tables are getting paragraph numbers unless @rcdc - m=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + m=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ if para =~m \ and para=~/\S+/ para=case @sto.format - when /^(1)~(?:(\S+))?/; odf_structure(para,$1,@sto.ocn,$2) + when /^(1):(\S*)/ + odf_structure(para,$1,@sto.ocn,$2) para - when /^(2)~(?:(\S+))?/; odf_structure(para,$1,@sto.ocn,$2) + when /^(2):(\S*)/ + odf_structure(para,$1,@sto.ocn,$2) para - when /^(3)~(?:(\S+))?/; odf_structure(para,$1,@sto.ocn,$2) + when /^(3):(\S*)/ + odf_structure(para,$1,@sto.ocn,$2) para - when /^(4)~(\S+)/; odf_structure(para,$1,@sto.ocn,$2) + when /^(4):(\S+)/ # work on see Split_text_object + odf_structure(para,$1,@sto.ocn,$2) # work on see SiSU_text_parts::Split_text_object para - when /^(5)~(?:(\S+))?/; odf_structure(para,$1,@sto.ocn,$2) + when /^(5):(\S*)/ + odf_structure(para,$1,@sto.ocn,$2) para - when /^(6)~(?:(\S+))?/; odf_structure(para,$1,@sto.ocn,$2) + when /^(6):(\S*)/ + odf_structure(para,$1,@sto.ocn,$2) para #@sto.lev_para_ocn.heading_body6 #when /^(i1)$/ @@ -610,12 +577,14 @@ module SiSU_ODF if para =~ /^(4)~(\S+)/ odf_structure(para,$1,@sto.ocn,$2) para - elsif para =~/<~(\d+);m\d+;[mdv]\d+><#@dp:#@dp>$/ + elsif para =~/#{Mx[:id_o]}~(\d+);m\d+;[mdv]\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ odf_structure(para,nil,nil,nil) #watch may be problematic para end end para.gsub!(//,' ') if para ## Clean Prepared Text + para.gsub!(/#{Mx[:gr_o]}.+?#{Mx[:gr_c]}/,' ') if para ## CHECK Clean Prepared Text + para.gsub!(/#{Mx[:tc_o]}.+?#{Mx[:tc_c]}/,' ') if para ## CHECK Clean Prepared Text para.gsub!(/<:\S+>/,' ') if para ## Clean Prepared Text end end @@ -668,7 +637,6 @@ module SiSU_ODF %{#@br} + %{#@br} + %{#@br} + # P1 - %{#@br} + %{#@br} + %{#@br} + diff --git a/lib/sisu/v0/odf_format.rb b/lib/sisu/v0/odf_format.rb index 3a6d0d94..05e4a918 100644 --- a/lib/sisu/v0/odf_format.rb +++ b/lib/sisu/v0/odf_format.rb @@ -81,10 +81,10 @@ module SiSU_ODF_format def initialize(one,two,three) one.gsub!(/\.(html|pdf|php)/,'') if one =~/\.\.\/\S+/ @one,@two,@three=one,two,three - rgx=/^[1-6-]~/ + rgx=/^#{Mx[:lv_o]}[1-6-]:/ @one.gsub!(rgx,'') if @one =~rgx @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern - rgx=/~\{\d+\s+(.+?)<#@dp>\}~/ + rgx=/#{Mx[:en_a_o]}\d+\s+(.+?)#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/ @one.gsub!(rgx,'\1') if @one =~rgx @link,@linkname=one,two @vz=SiSU_Env::Get_init.instance.skin @@ -178,45 +178,45 @@ module SiSU_ODF_format end def table @parablock='' if @parablock =~/^<#@dp:#@dp>/,'') + @parablock.gsub!(/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'') m=@parablock[//,1] @@tablefoot << m if m @parablock.gsub!(//,'') - @@tablehead=1 if @parablock =~//u; @parablock=table_head(@@table_counter,$1,true) - elsif @parablock =~//u; @parablock=table_head(@@table_counter,$1) + @@tablehead=1 if @parablock =~/#{Mx[:gr_o]}Th#{Mx[:tc_p]}/u + @@table_counter+=1 if @parablock =~/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}/u + if @parablock =~/#{Mx[:gr_o]}Th#{Mx[:tc_p]}\s+c(\d+).+?#{Mx[:tc_p]}~\d+;\w\d+;\w\d+#{Mx[:gr_c]}/u; @parablock=table_head(@@table_counter,$1,true) + elsif @parablock =~/#{Mx[:gr_o]}T#{Mx[:tc_p]}\s+c(\d+).+?#{Mx[:tc_p]}~\d+;\w\d+;\w\d+#{Mx[:gr_c]}/u; @parablock=table_head(@@table_counter,$1) end - if @parablock =~// + if @parablock =~/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/ tablefoot=[] @@tablefoot.each {|x| tablefoot << ''} @@tablefoot=[] - if @parablock =~//; @parablock=table_end + if @parablock =~/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/; @parablock=table_end end end if @@tablehead == 1 - if @parablock =~/¡¡/u - if @parablock =~// - @parablock.gsub!(/!>/,table_row_close(true)) + if @parablock =~/#{Mx[:tc_c]}/ + @parablock.gsub!(/#{Mx[:tc_c]}/,table_row_close(true)) end @@tablehead=0 end @parablock else - if @parablock =~// - @parablock.gsub!(/!>/,table_row_close) + if @parablock =~/#{Mx[:tc_c]}/ + @parablock.gsub!(/#{Mx[:tc_c]}/,table_row_close) end @parablock end @@ -224,8 +224,12 @@ module SiSU_ODF_format end def table_split @new_content=[] - @one.split(/\s*[] } @doc[:fns],@doc[:fnb],@doc[:scr_suffix]='','','' @@publisher='SiSU scribe' - attr_accessor :cmd,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:sfx,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:dc_title,:html_title,:subtitle,:subtitle_tex,:creator_home,:dc_creator,:translator,:illustrator,:prepared_by,:digitized_by,:dc_subject,:dc_description,:dc_publisher,:dc_contributor,:dc_date,:dc_date_created,:dc_date_issued,:dc_date_available,:dc_date_valid,:dc_date_modified,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:dc_type,:dc_format,:dc_identifier,:dc_source,:dc_language,:language_original,:dc_relation,:dc_coverage,:dc_rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:flag_auto_heading_num,:markup,:markup_instruction,:markup_version,:markup_declared,:make_bold,:make_italic,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:file_size,:user,:home,:hostname,:pwd,:firstseg,:programs,:creator_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:man_section,:man_name,:man_synopsis,:ec,:opt,:sem_tag + attr_accessor :cmd,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:sfx,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:dc_title,:html_title,:subtitle,:subtitle_tex,:creator_home,:dc_creator,:translator,:illustrator,:prepared_by,:digitized_by,:dc_subject,:dc_description,:dc_publisher,:dc_contributor,:dc_date,:dc_date_created,:dc_date_issued,:dc_date_available,:dc_date_valid,:dc_date_modified,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:dc_type,:dc_format,:dc_identifier,:dc_source,:dc_language,:language_original,:dc_relation,:dc_coverage,:dc_rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:flag_auto_heading_num,:markup,:markup_instruction,:markup_version,:markup_declared,:make_bold,:make_italic,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:file_size,:user,:home,:hostname,:pwd,:firstseg,:programs,:creator_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:man_section,:man_name,:man_synopsis,:ec,:opt,:sem_tag def initialize(fns_array,opt) @env=@fn=@fns=@fnb=@fnn=@fnt=@fnl=@flv=@fnz=@fnstex=@ocn=@sfx_src=@sfx=@pdf=@file_type=@dir_out=@dir_tex=@dir_lout=@txt_path=@flag_endnotes=@flag_auto_endnotes=@flag_separate_endnotes=@flag_separate_endnotes_make=@site_skin=@sisu=@sisu_version=@ruby_version=@title=@dc_title=@html_title=@subtitle=@subtitle_tex=@creator_home=@dc_creator=@translator=@illustrator=@prepared_by=@digitized_by=@dc_subject=@dc_description=@dc_publisher=@dc_contributor=@dc_date=@dc_date_created=@dc_date_issued=@dc_date_available=@dc_date_valid=@dc_date_modified=@date_scheme=@date_created_scheme=@date_issued_scheme=@date_available_scheme=@date_valid_scheme=@date_modified_scheme=@dc_type=@dc_format=@dc_identifier=@dc_source=@dc_language=@language_original=@dc_relation=@dc_coverage=@dc_rights=@keywords=@comments=@abstract=@cls_loc=@cls_dewey=@cls_pg=@cls_isbn=@papersize=@toc=@lv1=@lv2=@lv3=@lv4=@lv5=@lv6=@pagenew=@pagebreak=@num_top=@toc_lev_limit=@flag_auto_heading_num=@make_bold=@make_italic=@flag_tables=@vocabulary=@doc_skin=@doc_css=@yaml=@lnk=@prefix_a=@prefix_b=@suffix=@information=@contact=@icon=@ad_url=@ad_png=@ad_alt=@ad_began=@promo=@ad_home=@stmp=@stmpd=@sc_filename=@sc_number=@sc_date=@sc_time=@sc_info=@yamladdr=@locale=@wc_lines=@wc_words=@wc_bytes=@file_encoding=@file_size=@firstseg=@programs=@creator_copymark=@lang=@en=@dgst=@dgst_skin=@generated=@heading_seg_first=@base_program=@man_synopsis=nil @man_section=1 @@ -132,6 +132,7 @@ module SiSU_Param @markup=@markup_instruction #use @markup_instruction @doc,@fn,@make_italic,@make_bold,@tag_hash,@ec={},{},{},{},{},{},{} @flv,@lang,@seg_names,@tags,@tag_array,@tag_a,@ec[:image],@ec[:audio],@ec[:multimedia]=Array.new(9){[]} + @papersize_array=[] @rgx_image=/(?:^|[^_\\])\{\s*(\S+?\.(?:png|jpg|gif))/ @rgx_audio=/\{\s*(\S+?\.(?:mp3|ogg))/ @rgx_mm=/\{\s*(\S+?\.(?:ogg|mpeg))/ #expand and distinguish ogg @@ -419,17 +420,17 @@ module SiSU_Param @toc=[ @toc ] if @toc == String #@toc.each {|x| x.gsub!(/\{/,'\{') } #FIX~ lv1=@toc[0] ||='1~ ' #some arbitrary changes made - @lv1=/^#{lv1}\b/ + @lv1=/^#{lv1}/ lv2=@toc[1] ||='2~ ' - @lv2=/^#{lv2}\b/ + @lv2=/^#{lv2}/ lv3=@toc[2] ||='3~ ' - @lv3=/^#{lv3}\b/ + @lv3=/^#{lv3}/ lv4=@toc[3] ||='4~ ' - @lv4=/^#{lv4}\b/ + @lv4=/^#{lv4}/ lv5=@toc[4] ||='5~ ' - @lv5=/^#{lv5}\b/ + @lv5=/^#{lv5}/ lv6=@toc[5] ||='6~ ' - @lv6=/^#{lv6}\b/ + @lv6=/^#{lv6}/ when /^(?:0~(?:level|page|markup)|@(?:level|page|markup):)\s+(.+?)$/m #% processing revisit..., use syntax 0~level new=1,2,3; break=4 if para =~/(?:0~|@)(?:markup|level|page):?\s+(.+?)\Z/m page_break_str=$1 @@ -548,12 +549,12 @@ module SiSU_Param @man_synopsis= x[/synopsis=(.+)/m,1] if x =~/synopsis=.+/ end end - @lv1 ||=/^1~/ - @lv2 ||=/^2~/ - @lv3 ||=/^3~/ - @lv4 ||=/^4~/ - @lv5 ||=/^5~/ - @lv6 ||=/^6~/ + @lv1 ||=/^#{Mx[:lv_o]}1:/ + @lv2 ||=/^#{Mx[:lv_o]}2:/ + @lv3 ||=/^#{Mx[:lv_o]}3:/ + @lv4 ||=/^#{Mx[:lv_o]}4:/ + @lv5 ||=/^#{Mx[:lv_o]}5:/ + @lv6 ||=/^#{Mx[:lv_o]}6:/ else #% if para =~ /^(?:1|:?A)~/ #% processing if para=~/^:?A~/ @@ -696,6 +697,7 @@ module SiSU_Param or @mod.inspect =~/--(?:a4|letter|legal|book|a5|b5)\b/i #command line config/header override @papersize=determine_papersize(@mod.inspect) end + @papersize_array=@papersize.scan(/(?:a4|letter|legal|book|a5|b5)/i) if @sys.openssl !=false skin=if @doc_skin; SiSU_Env::Info_skin.new(@opt,@doc_skin).select else SiSU_Env::Info_skin.new(@opt).select @@ -836,12 +838,12 @@ module SiSU_Param end end @lnk=@lnk.compact if @lnk - @lv1 ||=/^1~/ - @lv2 ||=/^2~/ - @lv3 ||=/^3~/ - @lv4 ||=/^4~/ - @lv5 ||=/^5~/ - @lv6 ||=/^6~/ + @lv1 ||=/^#{Mx[:lv_o]}1:/ + @lv2 ||=/^#{Mx[:lv_o]}2:/ + @lv3 ||=/^#{Mx[:lv_o]}3:/ + @lv4 ||=/^#{Mx[:lv_o]}4:/ + @lv5 ||=/^#{Mx[:lv_o]}5:/ + @lv6 ||=/^#{Mx[:lv_o]}6:/ if @doc_skin tell=SiSU_Screen::Ansi.new(@cmd,"doc_skin <- #@doc_skin") tell.txt_grey if @cmd =~/v/ diff --git a/lib/sisu/v0/particulars.rb b/lib/sisu/v0/particulars.rb index ce12fc31..8ae4f66e 100644 --- a/lib/sisu/v0/particulars.rb +++ b/lib/sisu/v0/particulars.rb @@ -1,9 +1,10 @@ +# coding:utf-8 =begin * Name: SiSU * Description: a framework for document structuring, publishing and search - #___# + particulars, composite information about document being processed * Author: Ralph Amissah diff --git a/lib/sisu/v0/plaintext.rb b/lib/sisu/v0/plaintext.rb index 2cf26b93..b89a6252 100644 --- a/lib/sisu/v0/plaintext.rb +++ b/lib/sisu/v0/plaintext.rb @@ -68,6 +68,7 @@ module SiSU_Plaintext require "#{SiSU_lib}/plaintext_format" include Format require "#{SiSU_lib}/shared_txt" + require "#{SiSU_lib}/shared_structure" pwd=Dir.pwd @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0 @@tablefoot='' @@ -111,54 +112,6 @@ module SiSU_Plaintext end end private - class Split_text_object ).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - if /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,$5 - elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@lev,@text,@ocn=$1,$2,$3,$4 - elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@text,@ocn=$1,$2,$3 - elsif /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;[um]\d+><#@dp:#@dp>$/m.match(@para) - @@alt_id_count+=1 - @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,"x#{@@alt_id_count}" - elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @@alt_id_count+=1 - @format,@lev,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}" - end - else - if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @text,@ocn=$1,$2 - end - if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/ #added 2002w06 - @text=/(.+?)/m.match(@para)[1] - end - if /^((\d)~(?:~\S+)?)\s+(.+)/m.match(@para) - @format,@lev,@text=$1,$2,$3 - end - end - format=@format.dup - @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - Format::Format_text_object.new(format,@text,@ocn) - else - Format::Format_text_object.new(format,@text,"<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>") - end - self - end - end class Scroll \s*)?\d~(?:(\S+))?\s+)?(.+?)\s*<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m # 2004w18 pb pn removal added + @regx=/^(?:(?:#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}\s*)?#{Mx[:lv_o]}\d:(\S*?)#{Mx[:lv_c]})?\s*(.+?)\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m # 2004w18 pb pn removal added @tab="\t" @br=if md.mod.inspect =~ /--footnote/ \ and md.mod.inspect =~ /--dos/ @@ -198,12 +151,12 @@ module SiSU_Plaintext end # Used for extraction of endnotes from paragraphs def extract_endnotes(para='') - notes=para.scan(/~[{\[]([\d*+]+\s+.+?)\s*<#@dp>[}\]]~/) + notes=para.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/) @n=[] notes.flatten.each do |n| #high cost to deal with
appropriately within plaintext, consider n=n.dup.to_s - if n =~// - fix = n.split(//) #watch #added + if n =~/#{Mx[:br_line]}|#{Mx[:br_nl]}/ + fix = n.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/) #watch #added fix.each do |x| unless x.empty?; @n << x end @@ -283,11 +236,11 @@ WOK lv=nil if lv == 0 wrapped=if para[@regx] paragraph=para[@regx,2] - if paragraph =~/<:i([1-9])>/ + if paragraph =~/#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}/ m=$1.to_i - paragraph.gsub!(/<:i#{m}>/,'') + paragraph.gsub!(/#{Mx[:pa_o]}:i#{m}#{Mx[:pa_c]}/,'') util=SiSU_text_utils::Wrap.new(paragraph,78,m*2) - else util=SiSU_text_utils::Wrap.new(paragraph,78,0) + else util=SiSU_text_utils::Wrap.new(paragraph.gsub(/#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/,''),78,0) end util.line_wrap end @@ -322,83 +275,89 @@ WOK table_message='[table omitted, see other document formats]' fix=[] data.each do |para| - para.gsub!(//,'') # remove dummy headings (used by html) #check - para.gsub!(/_\*\s+/,'* ') # bullet markup, marked down - para.gsub!(/(.+?)<\/sup>/,'^\1^') - para.gsub!(/(.+?)<\/sub>/,'[\1]') - para.gsub!(/(.+?)<\/i>/,'/\1/') - para.gsub!(/(.+?)<\/b>/,'*\1*') - para.gsub!(/(.+?)<\/u>/,'_\1_') - unless para =~/<:code>/ + para.gsub!(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}.+/um,"#@br#{table_message}") + para.gsub!(/.+?#{Mx[:gl_o]}-##{Mx[:gl_c]}/,'') # remove dummy headings (used by html) #check + para.gsub!(/#{Mx[:gl_bullet]}\s*/,'* ') # bullet markup, marked down + para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'*\1*') + para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'/\1/') + para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'[\1]') + para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'_\1_') + para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'^\1^') + para.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+\1+') + para.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"\1"') + para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'-\1-') + unless para =~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/ para.gsub!(/\{(.+?)\}((?:https?|file|ftp):\/\/\S+|image)/,'\1 [link:] \2') - para.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.txt_open}\\2#{@url_brace.txt_close}\\3") + para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.txt_open}\\2#{@url_brace.txt_close}\\3") para.gsub!(/_((?:https?|file|ftp):\/\/\S+)/,'\1') extract_endnotes(para) - para.gsub!(/~[{\[]([\d*+]+)\s+(?:.+?)[}\]]~/,'[^\1]') # endnote marker marked up - para.gsub!(/&/,'&') - para.gsub!(/!/,'!') - para.gsub!(/#/,'#') - para.gsub!(/*/,'*') - para.gsub!(/-/,'-') - para.gsub!(///,'/') - para.gsub!(/_/,'_') - para.gsub!(/{/,'{') - para.gsub!(/}/,'}') - para.gsub!(/~/,'~') - para.gsub!(/©/,'©') + para.gsub!(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'[^\1]') # endnote marker marked up + para.gsub!(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'[^\1]') # endnote marker marked up + para.gsub!(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<') + para.gsub!(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>') + para.gsub!(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&') + para.gsub!(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!') + para.gsub!(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#') + para.gsub!(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*') + para.gsub!(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-') + para.gsub!(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/') + para.gsub!(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_') + para.gsub!(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{') + para.gsub!(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}') + para.gsub!(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~') + para.gsub!(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©') end - if para =~/<:(?:group|verse|alt|code)(?:-end)?>(?:\s+<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>)?/ - if para =~/<:code>/ #code-block: angle brackets special characters + if para =~/#{Mx[:gr_o]}(?:group|verse|alt|code)(?:-end)?#{Mx[:gr_c]}(?:\s+#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/ ##{Mx[:gr_o]}codeline#{Mx[:gr_c]} + if para =~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/ #code-block: angle brackets special characters para.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _> _< para.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _<_< end - para.gsub!(//,"\n") # watch - para.gsub!(/<:(?:group|verse|alt|code)(?:-end)?>(?:\s+<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>)?/,'') - else para.gsub!(//,"\n\n") # watch introduces a bug + para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n") # watch + para.gsub!(/#{Mx[:gr_o]}(?:group|verse|alt|code)(?:-end)?#{Mx[:gr_c]}(?:\s+#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/,'') + else para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n\n") # watch introduces a bug end - para.gsub!(/<:p[bn]>/,'') # remove page breaks - para.gsub!(/^\s*<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/,'') # remove empty lines - check - para.gsub!(/(^|\s)[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') + para.gsub!(/#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}/,'') # remove page breaks + para.gsub!(/^\s*#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/,'') # remove empty lines - check + para.gsub!(/(^|#{Mx[:gl_c]}|\s)[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') para.gsub!(/
(.+?)<\/a>/m,'\1') - para.gsub!(/<:name#\S+?>/,'') # remove name links + para.gsub!(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'') # remove name links para.gsub!(/ /,' ') # decide on para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") para.gsub!(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') #para.gsub!(/^\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') wordlist=para.scan(/\S+/) - if para =~/^0~(\S+)\s+(.+?)\Z/m # for headers + if para =~/^#{Rx[:meta]}\s*(.+?)\Z/m # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta if d_meta; plaintext_metadata(d_meta) end end - if para !~/(^0~||)/ + if para !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ if para =~@regx #/.+?<~\d+;\w\d+;\w\d+>.*/ #watch change paranum=para[@regx,3] @p_num=Format::Paragraph_number.new(paranum) end - @sto=Split_text_object.new(para).lev_segname_para_ocn + @sto=SiSU_Structure::Split_text_object.new(@md,para).txt ### problem in scroll, it appears tables are getting paragraph numbers - m=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + m=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ if para =~m \ and para=~/\S+/ para=case @sto.format - when /^(1)~(?:(\S+))?/ + when /^(1):(\S*?)/ plaintext_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body1 - when /^(2)~(?:(\S+))?/ + when /^(2):(\S*?)/ plaintext_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body2 - when /^(3)~(?:(\S+))?/ + when /^(3):(\S*?)/ plaintext_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body3 - when /^(4)~(\S+)/ # work on see SiSU_text_parts::Split_text_object + when /^(4):(\S+?)/ # work on see SiSU_text_parts::Split_text_object plaintext_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body4 - when /^(5)~(?:(\S+))?/ + when /^(5):(\S*?)/ plaintext_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body5 - when /^(6)~(?:(\S+))?/ + when /^(6):(\S*?)/ plaintext_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body6 #when /^(i1)$/ @@ -427,17 +386,17 @@ WOK elsif para =~/#{table_message}/ @plaintext[:body] << para << @br elsif para =~/(Note|Endnotes?)/ \ - and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ elsif para =~/(MetaData)/ \ - and para =~/<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/ #debug 2003w46 add rc info ####suspect visit + and para =~/#{Mx[:id_o]}~(\d+);[um]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #debug 2003w46 add rc info ####suspect visit #formatMono=MonoSiSU.new('
MetaData') #para=formatMono.bold_para elsif para.include? 'Owner Details' \ - and para !~/<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + and para !~/#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #formatMono=MonoSiSU.new('
Owner Details') #@plaintext[:owner_details]=formatMono.bold_para #para='' - elsif para =~/(¡|(.*)/ one,two=$1,$2 format_text=Format_text_object.new(one,two) @@ -446,7 +405,7 @@ WOK para='' if (para =~// \ and para =~/^(-\{{2}~\d+|)/) # -endnote case para - when /<:i[1-9]>/ + when /#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/ if para =~/.*<:#>.*$/m format_text=Format_text_object.new(para,'') para=format_text.scr_indent_one_no_paranum @@ -460,6 +419,7 @@ WOK format_text=Format_text_object.new(one,two) para=format_text.center end + para.gsub!(/#{Mx[:id_o]}.+?#{Mx[:id_c]}/,' ') if para ## Clean Prepared Text para.gsub!(//,' ') if para ## Clean Prepared Text para.gsub!(/<:\S+>/,' ') if para ## Clean Prepared Text end diff --git a/lib/sisu/v0/plaintext_format.rb b/lib/sisu/v0/plaintext_format.rb index 1840daa6..e957d3a3 100644 --- a/lib/sisu/v0/plaintext_format.rb +++ b/lib/sisu/v0/plaintext_format.rb @@ -81,10 +81,10 @@ module Format def initialize(one,two,three) one.gsub!(/\.(html|pdf|php)/,'') if one =~/\.\.\/\S+/ @one,@two,@three=one,two,three - rgx=/^[1-6-]~/ + rgx=/^#{Rx[:lv]}/ @one.gsub!(rgx,'') if @one =~rgx @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern - rgx=/~\{[\d*+]+\s+(.+?)<#@dp>\}~/ + rgx=/#{Mx[:en_a_o]}[\d*+]+\s+(.+?)#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/ @one.gsub!(rgx,'\1') if @one =~rgx @link,@linkname=one,two @vz=SiSU_Env::Get_init.instance.skin diff --git a/lib/sisu/v0/shared_html_lite.rb b/lib/sisu/v0/shared_html_lite.rb index 106a56af..a930821e 100644 --- a/lib/sisu/v0/shared_html_lite.rb +++ b/lib/sisu/v0/shared_html_lite.rb @@ -130,15 +130,16 @@ module SiSU_Format_Shared @words=@words.join(' ') end def markup(para) - if para !~/^<:code>/ + if para !~/^#{Mx[:gr_o]}:code#{Mx[:gr_c]}/ if para =~/\{.+?\}((?:https?|file|ftp)\S+|image)/ wm=para.scan(/\{.+?\}(?:(?:https?|file|ftp)\S+|image)|\S+/) words=urls(wm) para.gsub!(/.+/m,words) end + para.gsub!(/#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/m,'') para.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\2') #http ftp matches escaped, no decoration para.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\2\3') #special case \{ e.g. \}http://url - para.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #http ftp matches with decoration + para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #http ftp matches with decoration else para.gsub!(/(^|[^}])_/m,'\1>') #code-block: angle brackets special characters para.gsub!(/(^|[^}])_/m,'\1>') @@ -175,13 +176,9 @@ GSUB @content=markup(@content) %{#{@tab*1}

\n#{@tab*2}#@content\n#{@tab*1}

\n} end - def indent1 + def indent(t) @content=markup(@content) - %{#{@tab*1}

\n#{@tab*2}#@content\n#{@tab*1}

\n} - end - def indent2 - @content=markup(@content) - %{#{@tab*1}

\n#{@tab*2}#@content\n#{@tab*1}

\n} + %{#{@tab*1}

\n#{@tab*2}#@content\n#{@tab*1}

\n} end def para_table %{

} @@ -195,40 +192,40 @@ GSUB m=parablock[//,1] @@tablefoot</,'') - @@tablehead=1 if parablock =~//, + @@tablehead=1 if parablock =~/#{Mx[:gr_o]}Th#{Mx[:tc_p]}/u + parablock.gsub!(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}.+?#{Mx[:tc_p]}~(\d+)#{Mx[:gr_c]}/, #%{

\n} + %{}) - if parablock =~// + if parablock =~/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/ tablefoot=[] @@tablefoot.each {|x| tablefoot << %{

#{x}

\n}} @@tablefoot=[] - parablock.gsub!(//, + parablock.gsub!(/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/, %{#{@vz.table_close}\n}) # + #%{#{tablefoot}}) end if @@tablehead == 1 - if parablock =~/¡¡/u - parablock.gsub!(/} + %{\n') - @@tablehead=0 #if parablock =~/!>/ + parablock.gsub!(/#{Mx[:tc_c]}/, '') + @@tablehead=0 end parablock else - parablock.gsub!(/} + %{\n') + parablock.gsub!(/#{Mx[:tc_c]}/, '') parablock end @new_content << parablock diff --git a/lib/sisu/v0/shared_structure.rb b/lib/sisu/v0/shared_structure.rb new file mode 100644 index 00000000..f7b78e0e --- /dev/null +++ b/lib/sisu/v0/shared_structure.rb @@ -0,0 +1,151 @@ +# coding:utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + shared document structure + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, + 2008 Ralph Amissah All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007 Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_Structure + class Split_text_object + include SiSU_Viz + #include SiSU_HTML_Format_type + @@dp=nil + attr_reader :format,:text,:ocn,:scroll_lev_para_ocn,:seg_lev_para_ocn,:lev_para_ocn + def initialize(md,para) + @md,@para=md,para + @format,@ocn='null','null' + #@format,@ocn=nil,nil + @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern + end + def lev_segname_para_ocn + if @para =~/^#{Mx[:lv_o]}\d:.+?#{Mx[:id_o]}~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ + if @para[/^#{Mx[:lv_o]}([1-6]):(\S+?)#{Mx[:lv_c]}\s*(\S.+?)#{Mx[:id_o]}~(\d+);(?:[ohmu]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m] + @format,segname,@text,@ocn=$1,$2,$3,$4 + #@format="#@format:#{segname}" # + elsif @para[/^#{Mx[:lv_o]}([1-6]):#{Mx[:lv_c]}\s*(\S.+?)#{Mx[:id_o]}~(\d+);(?:[ohmu]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m] + @format,@text,@ocn=$1,$2,$3 + end + else + if @para[/^(#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}#{Mx[:gl_bullet]})\s*(.+?)#{Mx[:id_o]}~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m] + @format,@text,@ocn=$1,$2,$3,$4 #watch and work on + elsif @para[/^(#{Mx[:gl_bullet]})\s*(.+?)#{Mx[:id_o]}~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m] + @format,@text,@ocn=$1,$2,$3 + elsif @para[/#{Mx[:pa_o]}:(i[1-9])#{Mx[:pa_c]}\s*(.+?)#{Mx[:id_o]}~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m] + @format,@text,@ocn=$1,$2,$3 #consider + elsif @para[/#{Mx[:gr_o]}(code|alt|verse|group)#{Mx[:gr_c]}(.+?)#{Mx[:id_o]}~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m] + @format,@text,@ocn=$1,$2,$3 + elsif @para[/(.+?)#{Mx[:id_o]}~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m] + @text,@ocn=$1,$2 #,$3 + end + if @para !~/#{Mx[:id_o]}~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$|^$/ #added 2002w06 + @text=@para[/(.+?)/m,1] + end + if @para[/^#{Mx[:lv_o]}(\d):\S*?#{Mx[:lv_c]}\s+(.+)/m] + @format,@text=$1,$2 + end + end + @format="#@format:#{segname}" if @format =~/^[1-6]$/ + self + end + def html_seg + lev_segname_para_ocn + @seg_lev_para_ocn=if @para[/.+#{Mx[:id_o]}~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/] + SiSU_HTML_Format_type::Format_seg.new(@md,@format,@text,@ocn) + end + self + end + def html_scroll + lev_segname_para_ocn + @scroll_lev_para_ocn=if @para[/.+#{Mx[:id_o]}~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/] + SiSU_HTML_Format_type::Format_scroll.new(@md,@format,@text,@ocn) + end + self + end + def xml + require "#{SiSU_lib}/shared_xml" + lev_segname_para_ocn + @lev_para_ocn=if @para =~/.+#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ + SiSU_XML_format::Format_scroll.new(@md,@format,@text,"x#@ocn") + else + SiSU_XML_format::Format_scroll.new(@md,@format,@text,"#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}") + end + self + end + def odt + lev_segname_para_ocn + @lev_para_ocn=if @para =~/.+#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ + SiSU_ODF_format::Format_text_object.new(format,@text,@ocn) + else + SiSU_ODF_format::Format_text_object.new(format,@text,"#{Mx[:id_o]}~(\d+);[um]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}") + end + self + + end + def txt + lev_segname_para_ocn + #format=@format.dup + @lev_para_ocn=if @para =~/.+#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ + Format::Format_text_object.new(@format,@text,@ocn) + else + Format::Format_text_object.new(@format,@text,"#{Mx[:id_o]}~(\d+);[um]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}") + end + self + end + end +end +__END__ diff --git a/lib/sisu/v0/shared_txt.rb b/lib/sisu/v0/shared_txt.rb index aaa2d4d7..484a5f05 100644 --- a/lib/sisu/v0/shared_txt.rb +++ b/lib/sisu/v0/shared_txt.rb @@ -74,7 +74,7 @@ module SiSU_text_utils line=0 out=[] out[line]='' - @para.gsub!(/<:br>/,"\n\n") + @para.gsub!(/#{Mx[:br_nl]}/,"\n\n") words=@para.scan(/\n\n|\S+/m) while words != '' word=words.shift @@ -116,7 +116,7 @@ module SiSU_text_utils end class Header_scan def initialize(md,para) - @regxcl=/<~\d+;\w\d+;\w\d+><(?:[0-9a-f]{32}|[0-9a-f]{64}):(?:[0-9a-f]{32}|[0-9a-f]{64})>/ + @regxcl=/#{Mx[:id_o]}~\d+;\w\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}(?:[0-9a-f]{32}|[0-9a-f]{64}):(?:[0-9a-f]{32}|[0-9a-f]{64})#{Mx[:id_c]}/ para=para.gsub(@regxcl,'').dup @md,@p=md,para end @@ -155,122 +155,68 @@ module SiSU_text_utils end self end - def start_is_zero + def start_is_match meta=case @p - when /^0~(title)\s+(.+?)$/; header($1,@md.dc_title,'meta','dc') #dc 1 - #when /^0~(subtitle)\s+(.+?)$/; header($1,$2) - when /^0~(creator|author)\s+(.+?)$/; header('creator',$2,'meta','dc') #dc 2 - when /^0~(subject)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 3 - when /^0~(description)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 4 - when /^0~(publisher)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 5 - when /^0~(contributor)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 6 - when /^0~(date)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 7 - when /^0~(date\.created)\s+(.+?)$/; header($1,$2,'meta','extra') - when /^0~(date\.issued)\s+(.+?)$/; header($1,$2,'meta','extra') - when /^0~(date\.available)\s+(.+?)$/; header($1,$2,'meta','extra') - when /^0~(date\.valid)\s+(.+?)$/; header($1,$2,'meta','extra') - when /^0~(date\.modified)\s+(.+?)$/; header($1,$2,'meta','extra') - when /^0~(type)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 8 - when /^0~(format)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 9 - when /^0~(identifier)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 10 - when /^0~(source)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 11 - when /^0~(language)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 12 - when /^0~(relation)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 13 - when /^0~(coverage)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 14 - when /^0~(rights)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 15 - when /^0~(keywords)\s+(.+?)$/; header($1,$2,'meta','extra') - when /^0~(copyright)\s+(.+?)$/; header($1,$2,'meta','extra') - when /^0~(translator|translated_by)\s+(.+?)$/; header('translator',$2,'meta','extra') - when /^0~(illustrator|illustrated_by)\s+(.+?)$/; header('illustrator',$2,'meta','extra') - when /^0~(prepared_by)\s+(.+?)$/; header($1,$2,'meta','extra') - when /^0~(digitized_by)\s+(.+?)$/; header($1,$2,'meta','extra') - when /^0~(comments?)\s+(.+?)$/; header($1,$2,'meta','extra') - when /^0~(abstract)\s+(.+?)$/; header($1,$2,'meta','extra') - when /^0~(tags?)\s+(.+?)$/; header($1,$2,'meta','extra') - when /^0~(catalogue)\s+(.+?)$/; header($1,$2,'meta','extra') - when /^0~(class(?:ify)?_loc)\s+(.+?)$/; header('classify_loc',$2,'meta','extra') - when /^0~(class(?:ify)?_dewey)\s+(.+?)$/; header('classify_dewey',$2,'meta','extra') - when /^0~(class(?:ify)?_pg)\s+(.+?)$/; header('classify_pg',$2,'meta','extra') - when /^0~(class(?:ify)?_isbn)\s+(.+?)$/; header('classify_isbn',$2,'meta','extra') - when /^0~(toc|structure)\s+(.+?)$/; header('structure',$2,'meta','extra') - when /^0~(toc|structure)\s+(.+?)$/; header('structure',$2,'proc','instruct') - when /^0~(level|page|markup)\s+(.+?)$/; header('markup',$2,'process','instruct') - when /^0~(bold)\s+(.+?)$/; header($1,$2,'process','instruct') - when /^0~(italics|itali[sz]e)\s+(.+?)$/; header('italicize',$2,'process','instruct') - when /^0~(vocabulary|wordlist)\s+(.+?)$/; header('vocabulary',$2,'process','instruct') - when /^0~(skin)\s+(.+?)$/; header($1,$2,'process','instruct') - when /^0~(css|stylesheet)\s+(.+?)$/; header('css',$2,'process','instruct') - when /^0~(links)\s+(.+?)$/; header($1,$2,'process','instruct') - when /^0~(prefix)\s+(.+?)$/; header($1,$2,'process','instruct') - when /^0~(suffix)\s+(.+?)$/; header($1,$2,'process','instruct') - when /^0~(information)\s+(.+?)$/; header($1,$2,'process','instruct') - when /^0~(contact)\s+(.+?)$/; header($1,$2,'process','instruct') - when /^0~(rcs|cvs)\s+(.+?)$/; header('version',$2,'process','instruct') - else nil - end - end - def start_is_at - meta=case @p - when /^@(title):\s+(.+?)$/; header($1,@md.dc_title,'meta','dc') #dc 1 + when /^#{Mx[:meta_o]}(title)#{Mx[:meta_c]}\s*(.+?)$/; header($1,@md.dc_title,'meta','dc') #dc 1 #when /^@(subtitle):\s+(.+?)$/; header($1,$2,'meta','extra') - when /^@(creator|author):\s+(.+?)$/; header('creator',$2,'meta','dc') #dc 2 - when /^@(subject):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 3 - when /^@(description):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 4 - when /^@(publisher):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 5 - when /^@(contributor):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 6 - when /^@(date):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 7 - when /^@(date\.created):\s+(.+?)$/; header($1,$2,'meta','extra') - when /^@(date\.issued):\s+(.+?)$/; header($1,$2,'meta','extra') - when /^@(date\.available):\s+(.+?)$/; header($1,$2,'meta','extra') - when /^@(date\.valid):\s+(.+?)$/; header($1,$2,'meta','extra') - when /^@(date\.modified):\s+(.+?)$/; header($1,$2,'meta','extra') - when /^@(type):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 8 - when /^@(format):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 9 - when /^@(identifier):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 10 - when /^@(source):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 11 - when /^@(language):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 12 - when /^@(relation):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 13 - when /^@(coverage):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 14 - when /^@(rights):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 15 - when /^@(keywords):\s+(.+?)$/; header($1,$2,'meta','extra') - when /^@(copyright):\s+(.+?)$/; header($1,$2,'meta','extra') - when /^@(translator|translated_by):\s+(.+?)$/; header('translator',$2) - when /^@(illustrator|illustrated_by):\s+(.+?)$/; header('illustrator',$2) - when /^@(prepared_by):\s+(.+?)$/; header($1,$2,'meta','extra') - when /^@(digitized_by):\s+(.+?)$/; header($1,$2,'meta','extra') - when /^@(comments?):\s+(.+?)$/; header($1,$2,'meta','extra') - when /^@(abstract):\s+(.+?)$/; header($1,$2,'meta','extra') - when /^@(tags?):\s+(.+?)$/; header($1,$2,'meta','extra') - when /^@(catalogue):\s+(.+?)$/; header($1,$2,'meta','extra') - when /^@(class(?:ify)?_loc):\s+(.+?)$/; header('classify_loc',$2,'meta','extra') - when /^@(class(?:ify)?_dewey):\s+(.+?)$/; header('classify_dewey',$2,'meta','extra') - when /^@(class(?:ify)?_pg):\s+(.+?)$/; header('classify_pg',$2,'meta','extra') - when /^@(class(?:ify)?_isbn):\s+(.+?)$/; header('classify_isbn',$2,'meta','extra') - when /^@(toc|structure):\s+(.+?)$/; header('structure',$2,'process','instruct') - when /^@(level|page|markup):\s+(.+?)$/; header('markup',$2,'process','instruct') - when /^@(bold):\s+(.+?)$/; header($1,$2,'process','instruct') - when /^@(italics|itali[sz]e):\s+(.+?)$/; header('italicize',$2,'process','instruct') - when /^@(vocabulary|wordlist):\s+(.+?)$/; header('vocabulary',$2,'process','instruct') - when /^@(skin):\s+(.+?)$/; header($1,$2,'process','instruct') - when /^@(css|stylesheet):\s+(.+?)$/; header('css',$2,'process','instruct') - when /^@(links):\s+(.+?)$/; header($1,$2,'process','instruct') - when /^@(prefix):\s+(.+?)$/; header($1,$2,'process','instruct') #add a & b - when /^@(suffix):\s+(.+?)$/; header($1,$2,'process','instruct') - when /^@(information):\s+(.+?)$/; header($1,$2,'process','instruct') - when /^@(contact):\s+(.+?)$/; header($1,$2,'process','instruct') - when /^@(rcs|cvs):\s+(.+?)$/; header('version',$2,'process','instruct') + when /^#{Mx[:meta_o]}(creator|author)#{Mx[:meta_c]}\s*(.+?)$/; header('creator',$2,'meta','dc') #dc 2 + when /^#{Mx[:meta_o]}(subject)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 3 + when /^#{Mx[:meta_o]}(description)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 4 + when /^#{Mx[:meta_o]}(publisher)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 5 + when /^#{Mx[:meta_o]}(contributor)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 6 + when /^#{Mx[:meta_o]}(date)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 7 + when /^#{Mx[:meta_o]}(date\.created)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra') + when /^#{Mx[:meta_o]}(date\.issued)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra') + when /^#{Mx[:meta_o]}(date\.available)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra') + when /^#{Mx[:meta_o]}(date\.valid)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra') + when /^#{Mx[:meta_o]}(date\.modified)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra') + when /^#{Mx[:meta_o]}(type)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 8 + when /^#{Mx[:meta_o]}(format)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 9 + when /^#{Mx[:meta_o]}(identifier)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 10 + when /^#{Mx[:meta_o]}(source)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 11 + when /^#{Mx[:meta_o]}(language)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 12 + when /^#{Mx[:meta_o]}(relation)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 13 + when /^#{Mx[:meta_o]}(coverage)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 14 + when /^#{Mx[:meta_o]}(rights)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','dc') #dc 15 + when /^#{Mx[:meta_o]}(keywords)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra') + when /^#{Mx[:meta_o]}(copyright)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra') + when /^#{Mx[:meta_o]}(translator|translated_by)#{Mx[:meta_c]}\s*(.+?)$/; header('translator',$2) + when /^#{Mx[:meta_o]}(illustrator|illustrated_by)#{Mx[:meta_c]}\s*(.+?)$/; header('illustrator',$2) + when /^#{Mx[:meta_o]}(prepared_by)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra') + when /^#{Mx[:meta_o]}(digitized_by)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra') + when /^#{Mx[:meta_o]}(comments?)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra') + when /^#{Mx[:meta_o]}(abstract)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra') + when /^#{Mx[:meta_o]}(tags?)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra') + when /^#{Mx[:meta_o]}(catalogue)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'meta','extra') + when /^#{Mx[:meta_o]}(class(?:ify)?_loc)#{Mx[:meta_c]}\s*(.+?)$/; header('classify_loc',$2,'meta','extra') + when /^#{Mx[:meta_o]}(class(?:ify)?_dewey)#{Mx[:meta_c]}\s*(.+?)$/; header('classify_dewey',$2,'meta','extra') + when /^#{Mx[:meta_o]}(class(?:ify)?_pg)#{Mx[:meta_c]}\s*(.+?)$/; header('classify_pg',$2,'meta','extra') + when /^#{Mx[:meta_o]}(class(?:ify)?_isbn)#{Mx[:meta_c]}\s*(.+?)$/; header('classify_isbn',$2,'meta','extra') + when /^#{Mx[:meta_o]}(toc|structure)#{Mx[:meta_c]}\s*(.+?)$/; header('structure',$2,'process','instruct') + when /^#{Mx[:meta_o]}(level|page|markup)#{Mx[:meta_c]}\s*(.+?)$/; header('markup',$2,'process','instruct') + when /^#{Mx[:meta_o]}(bold)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'process','instruct') + when /^#{Mx[:meta_o]}(italics|itali[sz]e)#{Mx[:meta_c]}\s*(.+?)$/; header('italicize',$2,'process','instruct') + when /^#{Mx[:meta_o]}(vocabulary|wordlist)#{Mx[:meta_c]}\s*(.+?)$/; header('vocabulary',$2,'process','instruct') + when /^#{Mx[:meta_o]}(skin)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'process','instruct') + when /^#{Mx[:meta_o]}(css|stylesheet)#{Mx[:meta_c]}\s*(.+?)$/; header('css',$2,'process','instruct') + when /^#{Mx[:meta_o]}(links)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'process','instruct') + when /^#{Mx[:meta_o]}(prefix)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'process','instruct') #add a & b + when /^#{Mx[:meta_o]}(suffix)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'process','instruct') + when /^#{Mx[:meta_o]}(information)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'process','instruct') + when /^#{Mx[:meta_o]}(contact)#{Mx[:meta_c]}\s*(.+?)$/; header($1,$2,'process','instruct') + when /^#{Mx[:meta_o]}(rcs|cvs)#{Mx[:meta_c]}\s*(.+?)$/; header('version',$2,'process','instruct') else nil end end def dublin - out=if @p =~/^0~\S+\s/; start_is_zero - elsif @p =~/^@\S+:[+-]?\s/; start_is_at + out=if @p =~/^#{Mx[:meta_o]}\S+?#{Mx[:meta_c]}/ + start_is_match else nil end end def meta - out=if @p =~/^0~\S+\s/; start_is_zero - elsif @p =~/^@\S+:[+-]?\s/; start_is_at + out=if @p =~/^#{Mx[:meta_o]}\S+?#{Mx[:meta_c]}/ + start_is_match else nil end end @@ -287,30 +233,30 @@ module SiSU_text_parts_flatfile @@dl ||=SiSU_Env::Info_env.new.digest.length end def lev_segname_para_ocn - if @para =~/^(\d~|<:.+?>).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>.*/ - if /^([1-6])~(\S+)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para) + if @para =~/^(?:#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}|<:.+?>).+?#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}#{Mx[:id_c]}.*/ + if /^(#{Mx[:lv_o]}([1-6]):(\S+?)#{Mx[:lv_c]})\s*(\S.+?)#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}#{Mx[:id_c]}/m.match(@para) @format,segname,@text,@ocn=$1,$2,$3,$4 @format="#@format~#{segname}" # - elsif /^([1-6]~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para) + elsif /^(#{Mx[:lv_o]}([1-6]):#{Mx[:lv_c]})\s*(\S.+?)#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}#{Mx[:id_c]}/m.match(@para) @format,@text,@ocn=$1,$2,$3 - elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para) + elsif /<:(.+?)>\s*(\S.+?)#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}#{Mx[:id_c]}/m.match(@para) @format,@text,@ocn=$1,$2,$3 - elsif /^([1-6])~(\S+)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para) + elsif /^(#{Mx[:lv_o]}([1-6]):(\S+?)#{Mx[:lv_c]})\s*(\S.+?)#{Mx[:id_o]}~(0);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}#{Mx[:id_c]}/m.match(@para) @@alt_id_count+=1 @format,segname,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}" @format="#@format~#{segname}" # - elsif /^([1-6]~)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para) + elsif /^(#{Mx[:lv_o]}([1-6]):#{Mx[:lv_c]})\s*(\S.+?)#{Mx[:id_o]}~(0);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}#{Mx[:id_c]}/m.match(@para) @@alt_id_count+=1 @format,@text,@ocn=$1,$2,"x#{@@alt_id_count}" end else - if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para) + if /(.+?)#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}#{Mx[:id_c]}/m.match(@para) @text,@ocn=$1,$2 end - if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>|^$/ #added 2002w06 + if @para !~/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}#{Mx[:id_c]}|^$/ #added 2002w06 @text=/(.+?)/m.match(@para)[1] end - if /^(\d)~\S*\s+(.+)/m.match(@para) + if /^(#{Mx[:lv_o]}([1-9]):\S*?#{Mx[:lv_c]})\s*(.+)/m.match(@para) @format,@text=$1,$2 end end diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index c93eff5b..3c34e67f 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -59,75 +59,27 @@ =end module SiSU_text_parts - class Split_text_object + require "#{SiSU_lib}/shared_structure" + class Split_text_object < SiSU_Structure::Split_text_object require "#{SiSU_lib}/param" require "#{SiSU_lib}/xml_format" include SiSU_Viz include SiSU_XML_format @@alt_id_count=0 @@dp=nil - attr_reader :format,:text,:ocn,:lev_para_ocn - def initialize(md,para) - @md,@para=md,para - @format,@ocn='null','null' - #@format,@ocn=nil,nil - @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern - end - def lev_segname_para_ocn #using shared_txt instead, watch #% watch closely - if @para =~/^(\d~|<:.+?>).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - if /^([1-6])~(\S+)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,segname,@text,@ocn=$1,$2,$3,$4 - @format="#@format~#{segname}" # - elsif /^([1-6]~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@text,@ocn=$1,$2,$3 - elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@text,@ocn=$1,$2,$3 - elsif /^([1-6])~(\S+)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @@alt_id_count+=1 - @format,segname,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}" - @format="#@format~#{segname}" # - elsif /^([1-6]~)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @@alt_id_count+=1 - @format,@text,@ocn=$1,$2,"x#{@@alt_id_count}" - elsif /^(?:<:i([1-9])>\s*_\*)\s+(.+?)<~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@text,@ocn="_#{$1}\*",$2,$3,$4 - elsif /^(_\*)\s+(.+?)<~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@text,@ocn=$1,$2,$3 - elsif /<:(i[1-9])>\s*(.+?)<~(\d+);(?:[ohu]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @format,@text,@ocn=$1,$2,$3 - end - else - if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) - @text,@ocn=$1,$2 - end - if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/ #added 2002w06 - @text=/(.+?)/m.match(@para)[1] - end - if /^(\d)~\S*\s+(.+)/m.match(@para) - @format,@text=$1,$2 - end - end - @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - SiSU_XML_format::Format_scroll.new(@md,@format,@text,@ocn) - else - SiSU_XML_format::Format_scroll.new(@md,@format,@text,"<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>") - end - self - end def lev_segname_para - if @para =~/^(\d~|<:.+?>).+/ - if /^([1-6])~(\S+)\s+(\S.+)/m.match(@para) + if @para =~/^#{Mx[:lv_o]}(?:\d:|<:.+?>).+/ + if /^#{Mx[:lv_o]}([1-6]):(\S+?)#{Mx[:lv_c]}\s*(\S.+)/m.match(@para) @format,segname,@text=$1,$2,$3 - @format="#@format~#{segname}" # - elsif /^([1-6]~)\s+(\S.+)/m.match(@para) + elsif /^#{Mx[:lv_o]}([1-6]):#{Mx[:lv_c]}\s*(\S.+)/m.match(@para) @format,@text=$1,$2 elsif /<:(.+?)>\s*(\S.+?)/m.match(@para) @format,@text=$1,$2 - elsif /^([1-6])~(\S+)\s+(\S.+?)/m.match(@para) + elsif /^#{Mx[:lv_o]}([1-6]):(\S+?)#{Mx[:lv_c]}\s*(\S.+?)/m.match(@para) @@alt_id_count+=1 @format,segname,@text=$1,$2,$3 - @format="#@format~#{segname}" # - elsif /^([1-6]~)\s+(\S.+?)/m.match(@para) + #@format="#@format:#{segname}" # + elsif /^#{Mx[:lv_o]}([1-6]):#{Mx[:lv_c]}\s*(\S.+?)/m.match(@para) @@alt_id_count+=1 @format,@text=$1,$2 end @@ -135,17 +87,19 @@ module SiSU_text_parts if /(.+?)/m.match(@para) @text=$1 end - if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/ #added 2002w06 + if @para !~/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$|^$/ #added 2002w06 @text=/(.+?)/m.match(@para)[1] end if /^(\d)~\S*\s+(.+)/m.match(@para) @format,@text=$1,$2 end end - @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + @format="#@format:#{segname}" # +#follow this search beneath for heading_body1-6 + @lev_para_ocn=if @para =~/.+#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ SiSU_XML_format::Format_scroll.new(@md,@format,@text,@ocn) else - SiSU_XML_format::Format_scroll.new(@md,@format,@text,"<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>") + SiSU_XML_format::Format_scroll.new(@md,@format,@text,"#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}") end self end @@ -396,19 +350,40 @@ module SiSU_XML_munge def markup(para='') wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 para=tidywords(wordlist).join(' ').strip - para.gsub!(/(^|\s+)<\s+/,'\1< '); para.gsub!(/\s+>(\s+|$)/,' >\1') - para.gsub!(/<:pb>\s*/,'') + para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
') + para.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'') + para.gsub!(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;') + para.gsub!(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;') + para.gsub!(/(^|#{Mx[:gl_c]}|\s+)<\s+/,'\1< '); para.gsub!(/\s+>(\s+|$)/,' >\1') + #para.gsub!(/#{Mx[:fa_emphasis_o]}(.+?)#{Mx[:fa_emphasis_c]}/,'\1') #reinstate + para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') + para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') + para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') + para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1') + para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1') + para.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1') + para.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1') + para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') + para.gsub!(/<:pb>\s*/,'') #Fix para.gsub!(/<+[-~]#>+/,'') - para.gsub!(/<0;\w\d+;[um]\d+><#@dp:#@dp>/,'') - if para !~/^<:code>/ + para.gsub!(/#{Mx[:id_o]}0;\w\d+;[um]\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'') + if para !~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/ #embeds a red-bullet image --> + para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') + para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') + para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') + para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') + para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
') + para.gsub!(/#{Mx[:br_page]}\s*/,'') + para.gsub!(/#{Mx[:br_page_new]}\s*/,'') + para.gsub!(/#{Mx[:pa_non_object_no_heading]}|#{Mx[:pa_non_object_dummy_heading]}/,''); para.gsub!(/<[-~]#>/,'') para.gsub!(/(?:^|[^_\\])\{\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?\}(https?:\/\/\S+)/, %{[\\1] \\4}) para.gsub!(/(?:^|[^_\\])\{\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?\}(https?:\/\/\S+)/, %{\\1}) - para.gsub!(/(^|\s)\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?=\s|$))/, + para.gsub!(/(^|#{Mx[:gl_c]}|\s)\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?=\s|$))/, '\1\2\4') #watch, compare html_tune - para.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, + para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, %{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) para.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, '\1\2') #escaped urls not linked, deal with later @@ -429,7 +404,7 @@ module SiSU_XML_munge para.gsub!(//,'
') para.gsub!(/<:pb>\s*/,'') para.gsub!(/<[-~]#>/,'') - para.gsub!(/(^|\s)&\s+/,'\1& ') #sort + para.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort para.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/, "#{@dir.url.images_local}\/\\1") @@ -452,7 +427,7 @@ module SiSU_XML_munge para.gsub!(/<:\S+?>/,'') #<-- temporary para.gsub!(/<[-~]#>/,'') - para.gsub!(/(^|\s)&\s+/,'\1& ') #sort + para.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort para.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/, "#{@dir.url.images_local}\/\\1") @@ -728,4 +703,3 @@ module SiSU_Tables require "#{SiSU_lib}/xml_tables" end __END__ - diff --git a/lib/sisu/v0/sst_do_inline_footnotes.rb b/lib/sisu/v0/sst_do_inline_footnotes.rb index 3ee301f2..5cfd38da 100644 --- a/lib/sisu/v0/sst_do_inline_footnotes.rb +++ b/lib/sisu/v0/sst_do_inline_footnotes.rb @@ -68,7 +68,7 @@ module SiSU_Convert_footnotes include SiSU_Env include SiSU_Param include SiSU_Viz - include Syntax + include SiSU_Syntax class Instantiate < SiSU_Param::Parameters::Instructions @@flag={} #Beware!! def initialize @@ -337,7 +337,7 @@ module SiSU_Convert_footnotes # debug 2003w46 adding revision control info if @md.flag_auto_endnotes \ and @md.flag_separate_endnotes_make - @tuned_file << "\n1~endnotes Endnotes <~0;0:0;u0>" #prob numbering, revisit + @tuned_file << "\n1~endnotes Endnotes #{Mx[:id_o]}~0;0:0;u0#{Mx[:id_c]}" #prob numbering, revisit end @tuned_file << "\n" @tuned_file @@ -469,7 +469,7 @@ module SiSU_Convert_footnotes end def strip_clean_of_markup(s) # used for digest, define rules, make same as in db clean s=s.dup - s=s.gsub(/(?:<\/?[ib]>|<~\d+;(?:\w|[0-6]:)\d+;\w\d+>|<#@dp:#@dp>|^:[A-C]~\S+|^[1-6]~\S+|~\{\d+\s.+?\}~)/,'') # markup and endnotes removed + s=s.gsub(/(?:<\/?[ib]>|#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}|#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}|^:[A-C]~\S+|^[1-6]~\S+|~\{\d+\s.+?\}~)/,'') # markup and endnotes removed #% same as db clean --> s=s.gsub(/(.+?)<\/del>/,'DELETED(\1)') # deletions s=s.gsub(/(\d+)<\/sup>/,'[\1]') diff --git a/lib/sisu/v0/sst_to_s_xml_dom.rb b/lib/sisu/v0/sst_to_s_xml_dom.rb index 1bf9a561..23143ed8 100644 --- a/lib/sisu/v0/sst_to_s_xml_dom.rb +++ b/lib/sisu/v0/sst_to_s_xml_dom.rb @@ -135,7 +135,7 @@ module SiSU_simple_xml_model_dom def initialize(data='',particulars='') @data,@env,@md=data,particulars.env,particulars.md @vz=SiSU_Env::Get_init.instance.skin - @regx=/^(?:(?:<:p[bn]>\s*)?(?::?[A-C]|\d~)(?:(\S+))?\s+)?(.+)/ + @regx=/^(?:#{Mx[:mk_o]}:p[bn]#{Mx[:mk_c]}\s*)?(?:#{Mx[:lv_o]}[1-9]:(\S*)#{Mx[:lv_c]})?(.+)/ @tab="\t" if @md @trans=SiSU_XML_munge::Trans.new(@md) @@ -155,13 +155,14 @@ module SiSU_simple_xml_model_dom para.gsub!(/~\[([*+])\s+(.+?)\]~/, '\1\2 ') end - def xml_markup(para='') + def xml_clean(para='') para.gsub!(/~\[([*+])\s+(.+?)\]~/, '\1\2 ') para.gsub!(/~\{([*+]+)\s+(.+?)\}~/, '\1\2 ') para.gsub!(/~\{(.+?)\}~/, '\1 ') + para.gsub!(/#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/,'') #Danger, watch end def xml_head(meta) txt=meta.text @@ -367,11 +368,14 @@ WOK para.gsub!(/ /,' ') para=SiSU_document_structure::Structure.new(@md,para).structure @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 - if para =~/\A(?:@|0~)(\S+?):?\s+(.+?)\Z/m + para.gsub!(/^0~(\S+)/,"#{Mx[:lv_o]}@\\1#{Mx[:lv_c]}") + para.gsub!(/^@(\S+?):/,"#{Mx[:lv_o]}@\\1#{Mx[:lv_c]}") + if para =~/\A#{Mx[:lv_o]}@(\S+?)#{Mx[:lv_c]}\s*(.+?)\Z/m # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta if d_meta; xml_head(d_meta) end end + para='' if para=~/#{Mx[:lv_o]}@\S+?#{Mx[:lv_c]}/ if @rcdc==false \ and (para =~/~metadata/ or para =~/^1~meta\s+Document Information/) @rcdc=true @@ -382,28 +386,28 @@ WOK if para !~/^(?:%\s|0~|@\S+?:)/ format_scroll=SiSU_XML_format::Format_scroll.new(@md,@sto.text) if @sto.format =~/i[12]|null/ case @sto.format - when /^(1)~(\S+)?/ - xml_markup(para) + when /^(1):(\S*)/ + xml_clean(para) xml_structure($1,para,$2) para=@sto.lev_para_ocn.heading_body1 - when /^(2)~(\S+)?/ - xml_markup(para) + when /^(2):(\S*)/ + xml_clean(para) xml_structure($1,para,$2) para=@sto.lev_para_ocn.heading_body2 - when /^(3)~(\S+)?/ - xml_markup(para) + when /^(3):(\S*)/ + xml_clean(para) xml_structure($1,para,$2) para=@sto.lev_para_ocn.heading_body3 - when /^(4)~(\S+)?/ - xml_markup(para) + when /^(4):(\S*)/ + xml_clean(para) xml_structure($1,para,$2) para=@sto.lev_para_ocn.heading_body4 - when /^(5)~(\S+)?/ - xml_markup(para) + when /^(5):(\S*)/ + xml_clean(para) xml_structure($1,para,$2) para=@sto.lev_para_ocn.heading_body5 - when /^(6)~(\S+)?/ - xml_markup(para) + when /^(6):(\S*)?/ + xml_clean(para) xml_structure($1,para,$2) para=@sto.lev_para_ocn.heading_body6 #when /^(i1)$/i @@ -427,19 +431,20 @@ WOK # end else if para =~ /<:verse>/ - poem_structure(para) + para=poem_structure(para) elsif para =~ /<:group>/ - group_structure(para) + para=group_structure(para) elsif para =~ /<:code>/ para.gsub!(//,'>') - code_structure(para) + para=code_structure(para) elsif para =~/} << "\n" @@ -450,11 +455,11 @@ WOK end elsif para =~/(Note|Endnotes?)/ elsif para =~/(MetaData)/ \ - and para =~/<~\d+;[m]\d+;\w\d+><#{@@dp}:#{@@dp}>$/ #debug 2003w46 add rc info + and para =~/#{Mx[:id_o]}~\d+;[m]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#{@@dp}:#{@@dp}#{Mx[:id_c]}$/ #debug 2003w46 add rc info format_scroll=Format_scroll.new(@md,'
MetaData') para=format_scroll.bold_para elsif para =~/(Owner Details)/ \ - and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#{@@dp}:#{@@dp}>$/ + and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#{@@dp}:#{@@dp}#{Mx[:id_c]}$/ format_scroll=Format_scroll.new(@md,'
Owner Details') @@xml[:owner_details]=format_scroll.bold_para para='' @@ -484,10 +489,10 @@ WOK format_text=Format_text_object.new(one,two) para=format_text.center end - else end para.gsub!(/<:\S+?>/,'') para.gsub!(//,' ') + para end end @content_flag=true @@ -563,6 +568,7 @@ WOK para.gsub!(/_\{(.+?)\}_/,'\1') para.gsub!(/-\{(.+?)\}-/,'\1') # do { links }http://url + #para.gsub!(/#{Mx[:gl_o]}\S+?#{Mx[:gl_c]}/,'') #Danger, watch para.gsub!(/<:\S+?>/,'') para.gsub!(//,'') para="#{para}\n" unless para.empty? diff --git a/lib/sisu/v0/sst_to_s_xml_node.rb b/lib/sisu/v0/sst_to_s_xml_node.rb index 697a7a77..ed7f4b10 100644 --- a/lib/sisu/v0/sst_to_s_xml_node.rb +++ b/lib/sisu/v0/sst_to_s_xml_node.rb @@ -142,7 +142,7 @@ module SiSU_simple_xml_model_node def initialize(data='',particulars='') @data,@env,@md=data,particulars.env,particulars.md @vz=SiSU_Env::Get_init.instance.skin - @regx=/^(?:(?:<:p[bn]>\s*)?(?::?[A-C]~|\d~)(?:(\S+))?\s+)?(.+)/ + @regx=/^(?:#{Mx[:mk_o]}:p[bn]#{Mx[:mk_c]}\s*)?(?:#{Mx[:lv_o]}[1-9]:(\S*)#{Mx[:lv_c]})?(.+)/ @tab="\t" if @md @trans=SiSU_XML_munge::Trans.new(@md) @@ -159,16 +159,16 @@ module SiSU_simple_xml_model_node end protected def embedded_endnotes(para='') - para.gsub!(/~\{(.+?)\}~/,'\1 ') - para.gsub!(/~\[([*+])\s+(.+?)\]~/,'\2 ') + para.gsub!(/#{Mx[:en_a_o]}(.+?)#{Mx[:en_a_c]}/,'\1 ') + para.gsub!(/#{Mx[:en_b_o]}([*+])\s+(.+?)#{Mx[:en_b_c]}/,'\2 ') end def xml_head(meta) txt=meta.text - txt.gsub!(/\/{(.+?)}\//,'\1') - txt.gsub!(/[*!]{(.+?)}[*!]/,'\1') - txt.gsub!(/_{(.+?)}_/,'\1') - txt.gsub!(/-{(.+?)}-/,'\1') - txt.gsub!(//,'
') + txt.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') + txt.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') + txt.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') + txt.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') + txt.gsub!(/#{Mx[:br_line]}|#{Mx[:br_paragraph]}/,'
') txt.gsub!(/ & /,' and ') @@xml[:head] <<< @@ -273,13 +273,13 @@ WOK else puts 'error' end end - def node_structure(o='',para='',lv='',hname='') #extracted endnotes - if o.ocn - lv=lv.to_i - lv=nil if lv == 0 - build_relationships(o,lv) - end - end + #def node_structure(o='',para='',lv='',hname='') #extracted endnotes + # if o.ocn + # lv=lv.to_i + # lv=nil if lv == 0 + # build_relationships(o,lv) + # end + #end def xml_structure(o='',para='',lv='',hname='') #extracted endnotes if o.ocn puts para if lv and @md.cmd =~/M/ @@ -397,6 +397,10 @@ WOK @@xml[:body] << "#{@tab*0}" << "\n" #if para[@regx] @endnotes=[] end + def xml_clean(para) + para.gsub!(/#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/,'') + para + end def markup data=[] @data=@data.join.split("\n\n") @@ -404,7 +408,7 @@ WOK @data.each do |para| data << SiSU_document_structure::Structure.new(@md,para).structure end - data=Syntax::Markup.new(@md,data).songsheet + data=SiSU_Syntax::Markup.new(@md,data).songsheet data=SiSU_document_structure::Tables.new(@md,data).tables obj=SiSU_document_structure::OCN.new(@md,data).ocn obj.compact! @@ -429,58 +433,69 @@ WOK para=SiSU_sem::Tags.new(para,@md).rm.all para=@trans.markup_light(para) @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 - if para =~/\A(?:@|0~)(\S+?):?\s+(.+?)\Z/m # for headers + if para =~/\A#{Mx[:lv_o]}@(\S+?)#{Mx[:lv_c]}\s*(.+?)\Z/m # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta if d_meta; xml_head(d_meta) end end end end + #obj.each do |o| + # para=o.txt unless o.txt =~/^%% / #comments are lost, consider + # if para + # para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') + # para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') + # para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') + # para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') + # para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_paragraph]}/,'
') + # if @rcdc==false \ + # and (para =~/~metadata/ or para =~/^1~meta\s+Document Information/) + # if para !~/(^0~|^@\S+?:|^\s*$||)/ + # @rcdc=true + # end + # @sto=SiSU_text_parts::Split_text_object.new(@md,para).lev_segname_para + # unless @rcdc + # format_scroll=SiSU_XML_format::Format_scroll.new(@md,@sto.text) if @sto.format =~/i[12]|null/ + # case @sto.format + # when /^(1):(\S*)/ + # node_structure(o,para,$1,$2) + # when /^(2):(\S*)/ + # node_structure(o,para,$1,$2) + # when /^(3):(\S*)/ + # node_structure(o,para,$1,$2) + # when /^(4):(\S*)/ # work on see Split_text_object + # node_structure(o,para,$1,$2) + # when /^(5):(\S*)/ + # node_structure(o,para,$1,$2) + # when /^(6):(\S*)/ + # node_structure(o,para,$1,$2) + # else + # if para =~ /<:verse>/ + # node_structure(o,para) + # elsif para =~ /<:group>/ + # node_structure(o,para) + # elsif para =~ /<:code>/ + # node_structure(o,para) + # elsif para =~/|)/ - @rcdc=true - end - @sto=SiSU_text_parts::Split_text_object.new(@md,para).lev_segname_para - unless @rcdc - format_scroll=SiSU_XML_format::Format_scroll.new(@md,@sto.text) if @sto.format =~/i[12]|null/ - case @sto.format - when /^(1)~(\S+)?/ - node_structure(o,para,$1,$2) - when /^(2)~(\S+)?/ - node_structure(o,para,$1,$2) - when /^(3)~(\S+)?/ - node_structure(o,para,$1,$2) - when /^(4)~(\S+)?/ # work on see Split_text_object - node_structure(o,para,$1,$2) - when /^(5)~(\S+)?/ - node_structure(o,para,$1,$2) - when /^(6)~(\S+)?/ - node_structure(o,para,$1,$2) - else - if para =~ /<:verse>/ - node_structure(o,para) - elsif para =~ /<:group>/ - node_structure(o,para) - elsif para =~ /<:code>/ - node_structure(o,para) - elsif para =~/\1') + para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') + para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') + para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') + para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_paragraph]}/,'
') if @rcdc==false \ and (para =~/~metadata/ \ or para =~/^1~meta\s+Document Information/) @@ -491,22 +506,28 @@ WOK unless @rcdc format_scroll=SiSU_XML_format::Format_scroll.new(@md,@sto.text) if @sto.format =~/i[12]|null/ case @sto.format - when /^(1)~(\S+)?/ + when /^(1):(\S*)/ + xml_clean(para) xml_structure(o,para,$1,$2) para=@sto.lev_para_ocn.heading_body1 - when /^(2)~(\S+)?/ + when /^(2):(\S*)/ + xml_clean(para) xml_structure(o,para,$1,$2) para=@sto.lev_para_ocn.heading_body2 - when /^(3)~(\S+)?/ + when /^(3):(\S*)/ + xml_clean(para) xml_structure(o,para,$1,$2) para=@sto.lev_para_ocn.heading_body3 - when /^(4)~(\S+)?/ # work on see Split_text_object + when /^(4):(\S*)/ # work on see Split_text_object + xml_clean(para) xml_structure(o,para,$1,$2) para=@sto.lev_para_ocn.heading_body4 - when /^(5)~(\S+)?/ + when /^(5):(\S*)/ + xml_clean(para) xml_structure(o,para,$1,$2) para=@sto.lev_para_ocn.heading_body5 - when /^(6)~(\S+)?/ + when /^(6):(\S*)/ + xml_clean(para) xml_structure(o,para,$1,$2) para=@sto.lev_para_ocn.heading_body6 else @@ -519,6 +540,7 @@ WOK para.gsub!(/>/,'>') code_structure(o,para) elsif para =~//,'') para.gsub!(//,'') para="#{para}\n" unless para.empty? diff --git a/lib/sisu/v0/sst_to_s_xml_sax.rb b/lib/sisu/v0/sst_to_s_xml_sax.rb index e7765d33..4b3ca7c8 100644 --- a/lib/sisu/v0/sst_to_s_xml_sax.rb +++ b/lib/sisu/v0/sst_to_s_xml_sax.rb @@ -137,7 +137,7 @@ module SiSU_simple_xml_model_sax def initialize(data='',particulars='') @data,@env,@md=data,particulars.env,particulars.md @vz=SiSU_Env::Get_init.instance.skin - @regx=/^(?:(?:<:p[bn]>\s*)?(?::?[A-C]~|\d~)(?:(\S+))?\s+)?(.+)/ + @regx=/^(?:#{Mx[:mk_o]}:p[bn]#{Mx[:mk_c]}\s*)?(?:#{Mx[:lv_o]}[1-9]:(\S*)#{Mx[:lv_c]})?(.+)/ @tab="\t" if @md @trans=SiSU_XML_munge::Trans.new(@md) @@ -206,7 +206,7 @@ WOK @@xml[:body] << "#{@tab*1}" << "\n" if para[@regx] @@xml[:body] << if lv; %{#{@tab*1}\n#{@tab*2}#{wrapped}\n#{@tab*1}\n} << "\n" elsif wrapped =~/\A%%?\s+/; %{\n} # comments - else %{#{@tab*1}\n#{@tab*2}#{wrapped}\n#{@tab*1}\n} # main text, contents, body KEEP + else %{#{@tab*1}\n#{@tab*2}#{wrapped}\n#{@tab*1}\n} # main text, contents, body KEEP end #@@xml[:body] << "#{@tab*1}\n#{@tab*2}#{wrapped}\n#{@tab*1}\n" # main text, contents, body KEEP @@xml[:body] << "#{@endnotes}" if @endnotes # main text, endnotes KEEP @@ -252,9 +252,12 @@ WOK x.gsub!(/&/,'&') unless x =~/&\S+;/ end end + def xml_clean(para) + para.gsub!(/#{Mx[:gl_o]}[1-9]:\S*?#{Mx[:gl_c]}/,'') #Danger, watch + para + end def markup data=[] - #@data=@data.join.split("\n\n") dir=SiSU_Env::Info_env.new(@md.fns) xml_sc(@md) @endnotes,@level,@cont,@copen,@xml_contents_close=[],[],[],[],[] @@ -262,7 +265,7 @@ WOK (0..6).each { |x| @cont[x]=@level[x]=false } (4..6).each { |x| @xml_contents_close[x]='' } @data.each do |para| - data << SiSU_document_structure::Structure.new(@md,para).structure + data << SiSU_document_structure::Structure.new(@md,para).structure #takes on Mx marks end data.each do |para| if para !~/^\s*(?:%+ |<:code>)/ @@ -275,11 +278,14 @@ WOK end para=@trans.markup_light(para) @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 - if para =~/\A(?:@|0~)(\S+?):?\s+(.+?)\Z/m # for headers + para.gsub!(/^0~(\S+)/,"#{Mx[:lv_o]}@\\1#{Mx[:lv_c]}") + para.gsub!(/^@(\S+?):/,"#{Mx[:lv_o]}@\\1#{Mx[:lv_c]}") + if para =~/\A#{Mx[:lv_o]}@(\S+?)#{Mx[:lv_c]}\s*(.+?)\Z/m # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta if d_meta; xml_head(d_meta) end end + para='' if para=~/#{Mx[:lv_o]}@\S+?#{Mx[:lv_c]}/ if @rcdc==false \ and (para =~/~metadata/ or para =~/^1~meta\s+Document Information/) @rcdc=true @@ -287,24 +293,30 @@ WOK if para !~/(^0~|^@\S+?:|^\s*$||)/ @sto=SiSU_text_parts::Split_text_object.new(@md,para).lev_segname_para unless @rcdc - format_scroll=SiSU_XML_format::Format_scroll.new(@md,@sto.text) if @sto.format =~/i[12]|null/ + format_scroll=SiSU_XML_format::Format_scroll.new(@md,@sto.text) if @sto.format =~/i[1-9]|null/ case @sto.format - when /^(1)~(\S+)?/ + when /^(1):(\S*)/ + xml_clean(para) xml_structure(para,$1,$2) para=@sto.lev_para_ocn.heading_body1 - when /^(2)~(\S+)?/ + when /^(2):(\S*)/ + xml_clean(para) xml_structure(para,$1,$2) para=@sto.lev_para_ocn.heading_body2 - when /^(3)~(\S+)?/ + when /^(3):(\S*)/ + xml_clean(para) xml_structure(para,$1,$2) para=@sto.lev_para_ocn.heading_body3 - when /^(4)~(\S+)?/ # work on see Split_text_object + when /^(4):(\S*)/ # work on see Split_text_object + xml_clean(para) xml_structure(para,$1,$2) para=@sto.lev_para_ocn.heading_body4 - when /^(5)~(\S+)?/ + when /^(5):(\S*)/ + xml_clean(para) xml_structure(para,$1,$2) para=@sto.lev_para_ocn.heading_body5 - when /^(6)~(\S+)?/ + when /^(6):(\S*)/ + xml_clean(para) xml_structure(para,$1,$2) para=@sto.lev_para_ocn.heading_body6 #when /^(i1)$/i @@ -328,17 +340,18 @@ WOK # end else if para =~ /<:verse>/ - poem_structure(para) + para=poem_structure(para) elsif para =~ /<:group>/ - group_structure(para) + para=group_structure(para) elsif para =~ /<:code>/ para.gsub!(//,'>') - code_structure(para) + para=code_structure(para) + #elsif para =~/#{Mx[:gr_o]}Th?.+/ # tables come as single block #work area 2005w13 elsif para =~//,'') para.gsub!(//,'') ## Clean Prepared Text #bugwatch reinstate + para end + para end 6.downto(4) do |x| y=x - 1; v=x - 3 @@ -419,6 +434,7 @@ WOK def xml @sisu=[] @data.each do |para| + #para.gsub!(/#{Mx[:gl_o]}\S+?#{Mx[:gl_c]}/,'') #Danger, watch para.gsub!(/<:\S+?>/,'') para.gsub!(//,'') para="#{para}\n" unless para.empty? diff --git a/lib/sisu/v0/sysenv.rb b/lib/sisu/v0/sysenv.rb index 3d01729e..a259d991 100644 --- a/lib/sisu/v0/sysenv.rb +++ b/lib/sisu/v0/sysenv.rb @@ -380,7 +380,8 @@ module SiSU_Env @@flv ||=document_language_versions_found[:f] unless @@fns =~/\S+?\.zip/ @@fnz ||=if @@fns =~/(?:\~\S{2,3})?\.(?:ssm\.sst|ssm)$/; @@fnb + '.ssm.zip' - else @@fnb + '.sst.zip' + elsif @fnb; @@fnb + '.sst.zip' + else '' # e.g. termsheet end end else m=/(.+?)\.((?:-|ssm\.)?sst|ssm)$/ @@ -491,6 +492,16 @@ module SiSU_Env :xhtml => filename(code,'scroll','.xhtml'), :pdf_l => filename(code,'landscape','.pdf'), :pdf_p => filename(code,'portrait','.pdf'), + :pdf_l_a4 => filename(code,'landscape.a4','.pdf'), + :pdf_p_a4 => filename(code,'portrait.a4','.pdf'), + :pdf_l_a5 => filename(code,'landscape.a5','.pdf'), + :pdf_p_a5 => filename(code,'portrait.a5','.pdf'), + :pdf_l_b5 => filename(code,'landscape.b5','.pdf'), + :pdf_p_b5 => filename(code,'portrait.b5','.pdf'), + :pdf_l_letter => filename(code,'landscape.letter','.pdf'), + :pdf_p_letter => filename(code,'portrait.letter','.pdf'), + :pdf_l_legal => filename(code,'landscape.legal','.pdf'), + :pdf_p_legal => filename(code,'portrait.legal','.pdf'), :toc => filename(code,'toc','.html'), :doc => filename(code,'doc','.html'), :index => filename(code,'index','.html'), @@ -665,14 +676,8 @@ module SiSU_Env end @texpdf end - def latex2pdf(md=nil) #convert from latex to pdf + def latex2pdf(md,papersize='a4') #convert from latex to pdf tell=((@cmd =~/[MVv]/) ? '' : '> /dev/null' ) - papersize='a4' - papersize=if md and defined? md.papersize and not md.papersize.empty? \ - and md.papersize =~/(?:a4|letter|legal|book|a5|b5)\b/ - md.papersize - else papersize - end mode='batchmode' #mode='nonstopmode' program_ref="\n\t\tSee http://www.tug.org/applications/pdftex/\n\t\tOn Debian this is is included in tetex-extra" @@ -2045,8 +2050,8 @@ WOK is end def papersize # paper settings, default overidden in param if set within document - if defined? @rc['default']['papersize']; @rc['default']['papersize'].downcase - else defaults[:papersize].downcase + if defined? @rc['default']['papersize']; @rc['default']['papersize'].downcase + else defaults[:papersize].downcase end end def odf_structure @@ -3130,6 +3135,11 @@ WOK dest="#{@env.path.webserv}/_sisu/image" cp_images(src,dest) end + def cp_webserver_images_local #this should not have been necessary + src=@env.path.image_source + dest="#{@env.path.webserv}/#{@env.path.stub_pwd}/_sisu/image" + cp_images(src,dest) + end def cp_base_images #fix images src="#{@env.path.share}/image" dest="#{@env.path.webserv}/_sisu/image_sys" diff --git a/lib/sisu/v0/texinfo.rb b/lib/sisu/v0/texinfo.rb index 2b10c04c..adcba123 100644 --- a/lib/sisu/v0/texinfo.rb +++ b/lib/sisu/v0/texinfo.rb @@ -160,11 +160,11 @@ module SiSU_TexInfo data.each do |para| # DEBUG 2003w16 this is a kludge, because i could not get parameters # from param, Sort out ... revert to more elegant solution - if para =~ //,'') - para.gsub!(/(^|\s)\{(.+?)\}((?:https?|file):\/\/\S+)/,'\1(\2 [linked to:] \3)') + para.gsub!(/(^|#{Mx[:gl_c]}|\s)\{(.+?)\}((?:https?|file):\/\/\S+)/,'\1(\2 [linked to:] \3)') do_mono=TexInfoFormat::Texinfo.new(para,@md) @tex_file << do_mono.spec_char end @@ -176,10 +176,10 @@ module SiSU_TexInfo @tex_file=[] data.each do |para| # BUG bug -> have problems with endnotes in headers - if para =~ /\\~@\{\d+\s+/ #if para =~ /@\}\\~/m,' @footnote{ \1} ') - elsif para =~ /\\~@\{\*+\s+/ #if para =~ /@\}\\~/m,' @footnote{ \1} ') + if para =~ /#{Mx[:en_a_o]}\d+\s+/ #if para =~ // + if para =~ /#{Mx[:gr_o]}code#{Mx[:gr_c]}/ @@flag['code']=true @@counting=1 end - if para =~ /<:verse>/ + if para =~ /#{Mx[:gr_o]}verse#{Mx[:gr_c]}/ @@flag['poem']=1 end if @@flag['code'] if @@flag['code'] \ - and para =~ /<:code[-_](?:end|close)>/ #watch change not tested 200501 + and para =~ /#{Mx[:gr_o]}code[-_](?:end|close)#{Mx[:gr_c]}/ #watch change not tested 200501 @@flag['code']=false end if @@flag['code'] \ @@ -211,7 +211,7 @@ module SiSU_TexInfo end elsif @@flag['poem']==1 if @@flag['poem']==1 \ - and para =~ /<:verse[-_](?:end|close)>/ #watch change not tested 200501 + and para =~ /#{Mx[:gr_o]}verse[-_](?:end|close)#{Mx[:gr_c]}/ #watch change not tested 200501 @@flag['poem']=0 end if @@flag['poem']==1 \ @@ -229,7 +229,7 @@ module SiSU_TexInfo data=@data data.each do |line| if line =~ /\S/ \ - and line !~ /<:(code|verse).+/ + and line !~ /#{Mx[:gr_o]}(code|verse).+/ if @@flag['code'] line.gsub!(/^\s*(.+)/m, "\\noindent \\marginpar\[left-text\]{\\begin{tiny}#{@@counting}\\end{tiny}}\\1\\") @@counting+=1 if @@flag['code'] @@ -243,7 +243,7 @@ module SiSU_TexInfo @tex_file=[] @@tableheader=0 data.each do |para| - if para =~ /¡|\s*/,' ') - if para =~ /^[1-3]\\+~\S*(.+?)\s*$/ + if para =~ /^#{Mx[:lv_o]}[1-3]:\S*?#{Mx[:lv_c]}\s*(.+?)\s*$/ toc=TexInfoFormat::Texinfo.new($1,@md) texinfo_menu << toc.menu - elsif para =~ /^[4-6]\\+~(?:\S+)?\s+(.+?)\s*$/ + elsif para =~ /^#{Mx[:lv_o]}[4-6]:\S*?#{Mx[:lv_c]}\s*(.+?)\s*$/ toc=TexInfoFormat::Texinfo.new($1,@md) texinfo_menu << toc.menu case para - when /^[4]\\+~\S+\s+(.+?)\s*$/ + when /^#{Mx[:lv_o]}4:\S+?#{Mx[:lv_c]}\s*(.+?)\s*$/ n_menu+=1 @submenu[n_menu]=[] - when /^[5]\\+~\S+\s+(.+?)\s*$/ + when /^#{Mx[:lv_o]}5:\S+?#{Mx[:lv_c]}\s*(.+?)\s*$/ n_submenu+=1 @subsubmenu[n_menu]=[] @submenu[n_menu] << toc.menu - when /^[6]\\+~\S+\s+(.+?)\s*$/ + when /^#{Mx[:lv_o]}6:\S+?#{Mx[:lv_c]}\s*(.+?)\s*$/ n_submenu+=1 @subsubmenu[n_submenu]=[] @subsubmenu[n_submenu] << toc.menu @@ -299,14 +299,14 @@ module SiSU_TexInfo data.each do |para| mono=TexInfoFormat::Texinfo.new(para,@md) case para - when /^1\\+/; mono.level1 - when /^2\\+/; mono.level2 - when /^3\\+/; mono.level3 - when /^4\\+~/ + when /^#{Mx[:lv_o]}1:/; mono.level1 + when /^#{Mx[:lv_o]}2:/; mono.level2 + when /^#{Mx[:lv_o]}3:/; mono.level3 + when /^#{Mx[:lv_o]}4:/; mono.level4 n_menu+=1 @@do_submenu,@@do_subsubmenu=1,1 - when /^5\\+/ + when /^#{Mx[:lv_o]}5:/; n_submenu+=1 @@do_subsubmenu=1 if @@do_submenu==1 @@ -315,7 +315,7 @@ module SiSU_TexInfo @@do_submenu=0 else mono.level5 end - when /^6\\+/ + when /^#{Mx[:lv_o]}6:/; if @@do_submenu==1 menu=TexInfoFormat::Texinfo.new(@subsubmenu[n_menu],@md) para="#{menu.subsubmenu}#{mono.level6}" @@ -339,9 +339,7 @@ module SiSU_TexInfo if para !~/\S/ para=nil else - para.gsub!(/<\\~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/,'<\1>') - #para.gsub!(/<\\~(\d+);\w\d+;\w\d+><#@dp:#@dp>/,'<\1>') - #para.gsub!(/<\\~(\d+);\w\d+;\w\d+><#@dp:#@dp>/,'<\1>') + para.gsub!(/#{Mx[:id_o]}\\~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/,'<\1>') para end end @@ -369,7 +367,7 @@ module SiSU_TexInfo no1=num_top; no2=(num_top + 1); no3=(num_top + 2); no4=(num_top + 3); data.each do |para| if @md.markup =~ /num_top/i \ - and para !~ /0\\+/ + and para !~ /#{Rx[:meta]}/ if para =~ /^[1-6]\\+(?:~\S+)?\s*/ \ and para !~ /<:\d-endnotes>/ header=para[//m, 1].gsub!(/-/m,'.') @@ -390,9 +388,9 @@ module SiSU_TexInfo @tex_file=[] paranumber=0 data.each do |para| - if para =~/<\\~(\d+);\w\d+;\w\d+><#@dp:#@dp>/ \ - and para !~ // - parablock,paranum=/(.+?)<\\~(\d+);\w\d+;\w\d+><#@dp:#@dp>/im.match(para)[1,2] + if para =~/#{Mx[:id_o]}\\~(\d+);\w\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/ \ + and para !~ /#{Mx[:br_eof]}/ + parablock,paranum=/(.+?)#{Mx[:id_o]}\\~(\d+);\w\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/im.match(para)[1,2] do_duo=TexInfoFormat::DuoTex.new(parablock,paranum) ###is BUG para=do_duo.paraNum if parablock end diff --git a/lib/sisu/v0/texinfo_format.rb b/lib/sisu/v0/texinfo_format.rb index 5bcdf04d..42d62960 100644 --- a/lib/sisu/v0/texinfo_format.rb +++ b/lib/sisu/v0/texinfo_format.rb @@ -237,7 +237,7 @@ WOK "@bye" end def clean(para) - para.gsub!(/<\\~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/,'<\1>') + para.gsub!(/#{Mx[:id_o]}\\~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/,'<\1>') para.gsub!(/\s*[,:]\s*/,' - ') para.gsub!(/<:#>/,'') para.strip! @@ -249,21 +249,21 @@ WOK "* #{para}::" end def level1 - @para.gsub!(/[1]\\+~/,'') + @para.gsub!(/#{Mx[:lv_o]}1:\S*?#{Mx[:lv_c]}\s*/,'') para=clean(@para) nd=para.gsub(/@footnote\{.+?\}\s+/,'') para="@node #{nd}\n@unnumbered #{para}\n@cindex chapter, #{nd}\n" @para.gsub!(/.+/,"#{para}") end def level2 - @para.gsub!(/[2]\\+~/,'') + @para.gsub!(/#{Mx[:lv_o]}2:\S*?#{Mx[:lv_c]}\s*/,'') para=clean(@para) nd=para.gsub(/@footnote\{.+?\}\s+/,'') para="@node #{nd}\n@unnumbered #{para}\n@cindex chapter, #{nd}\n" @para.gsub!(/.+/,"#{para}") end def level3 - @para.gsub!(/[3]\\+\~/,'') + @para.gsub!(/#{Mx[:lv_o]}3:\S*?#{Mx[:lv_c]}\s*/,'') para=clean(@para) nd=para.gsub(/@footnote\{.+?\}\s+/,'') #para=para.gsub(/(.+?)\s*(@footnote\{.+?\})\s*(.+)$/,"\\1 \\3\n\\2") @@ -272,21 +272,21 @@ WOK @para.gsub!(/.+/,"#{para}") end def level4 - @para.gsub!(/[4]\\+~\S+/,'') + @para.gsub!(/#{Mx[:lv_o]}4:\S+?#{Mx[:lv_c]}\s*/,'') para=clean(@para) nd=para.gsub(/@footnote\{.+?\}\s+/,'') para="@node #{nd}\n@unnumbered #{para}\n@cindex chapter, #{nd}\n" @para.gsub!(/.+/,"#{para}") end def level5 - @para.gsub!(/[5]\\+~\S+/,'') + @para.gsub!(/#{Mx[:lv_o]}5:\S*?#{Mx[:lv_c]}\s*/,'') para=clean(@para) nd=para.gsub(/@footnote\{.+?\}\s+/,'') para="@node #{nd}\n@unnumbered #{para}\n@cindex chapter, #{nd}\n" @para.gsub!(/.+/,"#{para}") end def level6 - @para.gsub!(/[6]\\+~\S+/,'') + @para.gsub!(/#{Mx[:lv_o]}6:\S*?#{Mx[:lv_c]}\s*/,'') para=clean(@para) nd=para.gsub(/@footnote\{.+?\}\s+/,'') para="@node #{nd}\n@unnumbered #{para}\n@cindex chapter, #{nd}\n" @@ -313,16 +313,16 @@ WOK @para.gsub!(/<:i2>(.*)/,'\1') end def spec_char # special characters - @para.gsub!(//i,'') - @para.gsub!(/©/,'(c)') - @para.gsub!(/&(lt|#060);/,'<'); @para.gsub!(/&(gt|#062);/,'>') - @para.gsub!(/{/,'{'); @para.gsub!(/}/,'}') - @para.gsub!(/&#(126|152);/i,'~') - @para.gsub!(/!/,'!') - @para.gsub!(/#/,'#') - @para.gsub!(/*/,'*') - @para.gsub!(///,'/') - @para.gsub!(/_/,'_') + @para.gsub!(/#{Mx[:br_eof]}/i,'') + @para.gsub!(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'(c)') + @para.gsub!(/#{Mx[:gl_o]}#(?:lt|060)#{Mx[:gl_c]}/,'<'); @para.gsub!(/#{Mx[:gl_o]}(gt|#062)#{Mx[:gl_c]}/,'>') + @para.gsub!(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{'); @para.gsub!(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}') + @para.gsub!(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~') + @para.gsub!(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!') + @para.gsub!(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#') + @para.gsub!(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*') + @para.gsub!(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/') + @para.gsub!(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_') @para.gsub!(/&atild;<\/font><\/sup>/,' ') @para.gsub!(/\\/,'\\backslash ') @para.gsub!(/<:pb>/,'\\newpage') @@ -375,9 +375,9 @@ WOK def longtable @end_table="\\end{longtable}" @row_break='\\\\\\' - if @para[//ui] + if @para[/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}\s+c(\d+);(.+?)#{Mx[:gr_c]}/ui] #CHECK !> closure no_of_cols,cols_width=$1,$2 - @@tableheader=1 if @para =~ //u,"#{@@start_table}") + @para.gsub!(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}\s+c\d+?;.+#{Mx[:gr_c]}/u,"#{@@start_table}") end - if @para =~// - @para.gsub!(//," #@end_table") + if @para =~/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/ + @para.gsub!(/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/," #@end_table") end - @para.gsub!(//,1] @para.gsub!(/\/,'') - @para.gsub!(/¡\d+?¡(.+?)(?:¡|!)/u, + @para.gsub!(/#{Mx[:tc_p]}\d+?#{Mx[:tc_p]}(.+?)(?:#{Mx[:tc_p]}|!)/u, "{\\begin{tiny} {\\bfseries \\1}\\end{tiny}}&") @para.gsub!(/&>\s*$/, " #@row_break \\hline\\endhead #@row_break") @@ -409,8 +409,8 @@ WOK @@number_of_cols=0 end else - if @para =~/¡\d+?¡(.+?)(?:¡|!)/u - @para.gsub!(/¡\d+?¡(.+?)(?:¡|!)/u,"\\begin{tiny}\\1\\end{tiny}&") + if @para =~/#{Mx[:tc_p]}\d+?#{Mx[:tc_p]}(.+?)(?:#{Mx[:tc_p]}|!)/u + @para.gsub!(/#{Mx[:tc_p]}\d+?#{Mx[:tc_p]}(.+?)(?:#{Mx[:tc_p]}|!)/u,"\\begin{tiny}\\1\\end{tiny}&") @para.gsub!(/&>\s*$/," #@row_break") end end @@ -423,7 +423,7 @@ WOK @end_table="\\end{tabular}" @row_break='\\\\\\\\' @break_page="#@row_break\n#@row_break \n" - if @para[//ui] + if @para[/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}\s+c(\d+);(.+?)#{Mx[:gr_c]}/ui] no_of_cols,cols_width=$1,$2 @w=cols_width.split(/;\s+/) @colW=[] @@ -432,13 +432,13 @@ WOK @colW << "p{#{col_w}cm}" if col_w end @@start_table="\\begin{tabular}{#@colW}\n" - @para.gsub!(//u,"#{@@start_table}") + @para.gsub!(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}\s+c\d+?;.+#{Mx[:gr_c]}/u,"#{@@start_table}") end - if @para =~// - @para.gsub!(//,"#@end_table") + if @para =~/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/ + @para.gsub!(/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/,"#@end_table") @@table_pg_break_counter=1 end - if @para =~//] @para.gsub!(/\/,'') end end - if @para =~/¡\d+?¡(.+?)(?:¡|!)/u - @para.gsub!(/¡\d+?¡(.+?)(?:¡|!)/u,"\\begin{tiny}\\1\\end{tiny}&") + if @para =~/#{Mx[:tc_p]}\d+?#{Mx[:tc_p]}(.+?)(?:#{Mx[:tc_p]}|!)/u + @para.gsub!(/#{Mx[:tc_p]}\d+?#{Mx[:tc_p]}(.+?)(?:#{Mx[:tc_p]}|!)/u,"\\begin{tiny}\\1\\end{tiny}&") @para.gsub!(/&>\s*$/,"#@row_break") end @para diff --git a/lib/sisu/v0/texpdf.rb b/lib/sisu/v0/texpdf.rb index 1d37b588..8d951dfb 100644 --- a/lib/sisu/v0/texpdf.rb +++ b/lib/sisu/v0/texpdf.rb @@ -72,7 +72,7 @@ module SiSU_TeX @@tabular="{tabular}" @@column_instruct=@@squigle_close=@@tex_line_mode=@@tex_word_mode=@@line_mode='' @@tex_debug_counter=@@table_pagebreak_counter=@@tex_footnote_call_counter=@@tex_table_flag=@@tex_counter=@@tex_column=@@tex_columns=@@tex_columns=@@counting=0 - @@tex_pattern_margin_number="\\\\begin\\\{tiny\\\}~\\\\end\\\{tiny\\\}\\\{\\\\marginpar.+?\s+" + @@tex_pattern_margin_number=/\\\\begin\\\{tiny\\\}~\\\\end\\\{tiny\\\}\\\{\\\\marginpar.+?\s+/ #@@tex_pattern_margin_number="\\\\marginpar.+?\s+" @@n=@@tableheader=@@rights=nil @@date ||=SiSU_Env::Info_date.new @@ -116,12 +116,10 @@ module SiSU_TeX tell=SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{@env.program.pdf_viewer} #{path}/#{@md.fnb}/#{@md.fn[:pdf_p]}") tell.flow if @opt.cmd =~/[MVv]/ @md=@particulars.md - my_make=SiSU_Env::Create_file.new(@opt.cmd,@opt.fns) + SiSU_Env::Create_file.new(@opt.cmd,@opt.fns) $flag=@md.cmd #introduced to pass 0 for no object citation numbers... to texpdf_format @dir_o="#{@env.path.output}/#{@opt.fnb}" directories - filename_index="#{@md.txt_path}/pdf" - newfilename=filename_index #% needed needs to be reprogrammed !!! SiSU_Env::Info_skin.new(@md).select dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here @@ -160,33 +158,49 @@ module SiSU_TeX def initialize(md,env) @md,@env=md,env end - def latex_do(texfilename) + def latex_do(texfilename,papersize) @texfilename=texfilename @@n_lpdf=@@n_lpdf+1 - tell=SiSU_Screen::Ansi.new(@md.cmd,'portrait ->') + tell=SiSU_Screen::Ansi.new(@md.cmd,"#{papersize} portrait ->") tell.dark_grey_title_hi if @md.cmd =~/[MVv]/ tex_fn_base=@texfilename.gsub(/\.tex$/,'') cmd=SiSU_Env::System_call.new("#{tex_fn_base}.tex",'',@md.cmd) tell=SiSU_Screen::Ansi.new(@md.cmd) tell.grey_open unless @md.cmd =~/q/ - 2.times { |i| cmd.latex2pdf(@md) unless ("#{tex_fn_base}.tex" !~/\w+/) } + if "#{tex_fn_base}" =~/\w+/ \ + and "#{papersize}" =~/\w+/ + 2.times { |i| cmd.latex2pdf(@md,papersize) } + end tell.p_off unless @md.cmd =~/q/ - tell=SiSU_Screen::Ansi.new(@md.cmd,'landscape ->') + tell=SiSU_Screen::Ansi.new(@md.cmd,"#{papersize} landscape ->") tell.dark_grey_title_hi if @md.cmd =~/[MVv]/ cmd=SiSU_Env::System_call.new("#{tex_fn_base}.landscape.tex",'',@md.cmd) tell.grey_open - 2.times { |i| cmd.latex2pdf(@md) unless ("#{tex_fn_base}.landscape.tex" !~/\w+/) } + if "#{tex_fn_base}" =~/\w+/ \ + and "#{papersize}" =~/\w+/ + 2.times { |i| cmd.latex2pdf(@md,papersize) } + end tell.p_off unless @md.cmd =~/q/ pwd=Dir.pwd portrait_pdf="#{pwd}/#{tex_fn_base}.pdf" landscape_pdf="#{pwd}/#{tex_fn_base}.landscape.pdf" + case papersize + when /a4/; pdf_p=@md.fn[:pdf_p_a4]; pdf_l=@md.fn[:pdf_l_a4] + when /a5/; pdf_p=@md.fn[:pdf_p_a5]; pdf_l=@md.fn[:pdf_l_a5] + when /b5/; pdf_p=@md.fn[:pdf_p_b5]; pdf_l=@md.fn[:pdf_l_b5] + when /letter/; pdf_p=@md.fn[:pdf_p_letter]; pdf_l=@md.fn[:pdf_l_letter] + when /legal/; pdf_p=@md.fn[:pdf_p_legal]; pdf_l=@md.fn[:pdf_l_legal] + else pdf_p=@md.fn[:pdf_p_a4]; pdf_l=@md.fn[:pdf_l_a4] + end if FileTest.file?(portrait_pdf) - cp(portrait_pdf,"#@dir_sisu/#{@md.fnb}/#{@md.fn[:pdf_p]}") + cp(portrait_pdf,"#@dir_sisu/#{@md.fnb}/#{pdf_p}") rm(portrait_pdf) + else p "here #{__FILE__} #{__LINE__} NOT FOUND: #{portrait_pdf}" if @md.cmd.inspect =~/M/ end if FileTest.file?(landscape_pdf) - cp(landscape_pdf,"#@dir_sisu/#{@md.fnb}/#{@md.fn[:pdf_l]}") + cp(landscape_pdf,"#@dir_sisu/#{@md.fnb}/#{pdf_l}") rm(landscape_pdf) + else p "here #{__FILE__} #{__LINE__} NOT FOUND: #{landscape_pdf}" if @md.cmd.inspect =~/M/ end tell=SiSU_Screen::Ansi.new(@md.cmd,@@n_lpdf,'processed (SiSU LaTeX to pdf - using pdfetex aka. pdftex or pdflatex)') tell.generic_number unless @md.cmd =~/q/ @@ -198,25 +212,50 @@ module SiSU_TeX @tex_f_no=0 info={} if FileTest.file?(@env.source_file_with_path) - if @md.fns =~/\.(?:-|ssm\.)?sst$/ - @dirout=SiSU_Env::Info_env.new(@md.fns) - case @md.fns - when /\.(?:-|ssm\.)?sst$/ - if FileTest.directory?(@env.path.tex)==true - Dir.chdir(@env.path.tex) - @dir_sisu=@dirout.path.output - texfile=@md.fns.gsub(/$/,'.tex') - texfile=texfile.gsub(/~/,'-') - if File.exist?(texfile) \ - and File.size(texfile) > 0 - @tex_f_no+=1 - latex_do(texfile) - else - puts "\tzero file size #{@env.path.tex}/#{@md.fns}" + @md.papersize_array.each do |ps| + if @md.fns =~/\.(?:-|ssm\.)?sst$/ + @dirout=SiSU_Env::Info_env.new(@md.fns) + case @md.fns + when /\.(?:-|ssm\.)?sst$/ + if FileTest.directory?(@env.path.tex)==true + Dir.chdir(@env.path.tex) + @dir_sisu=@dirout.path.output + texfile=@md.fns.gsub(/$/,".#{ps}.tex") + texfile=texfile.gsub(/~/,'-') + if File.exist?(texfile) \ + and File.size(texfile) > 0 + @tex_f_no+=1 + latex_do(texfile,ps) + else + puts "\tzero file size #{@env.path.tex}/#{texfile}" + end end end end end + outputdir="#@dir_sisu/#{@md.fnb}" + case @md.papersize_array[0] #default pdf + when /a4/; pdf_p=@md.fn[:pdf_p_a4]; pdf_l=@md.fn[:pdf_l_a4] + when /a5/; pdf_p=@md.fn[:pdf_p_a5]; pdf_l=@md.fn[:pdf_l_a5] + when /b5/; pdf_p=@md.fn[:pdf_p_b5]; pdf_l=@md.fn[:pdf_l_b5] + when /letter/; pdf_p=@md.fn[:pdf_p_letter]; pdf_l=@md.fn[:pdf_l_letter] + when /legal/; pdf_p=@md.fn[:pdf_p_legal]; pdf_l=@md.fn[:pdf_l_legal] + else pdf_p=@md.fn[:pdf_p_a4]; pdf_l=@md.fn[:pdf_l_a4] + end + if FileTest.file?("#{outputdir}/#{pdf_p}") + system(" + cd #{outputdir} + rm portrait.pdf + ln -s #{pdf_p} portrait.pdf + ") + end + if FileTest.file?("#{outputdir}/#{pdf_l}") + system(" + cd #{outputdir} + rm landscape.pdf + ln -s #{pdf_l} landscape.pdf + ") + end else tell=SiSU_Screen::Ansi.new(@md.cmd,"FILE NOT FOUND: << #{@md.fns} >> - requested latex system processing skipped") tell.warn @@ -232,7 +271,14 @@ module SiSU_TeX #include SiSU_Param @@tex_backslash ||="\\\\" @@tilde='\\\\\\~' #?? debug crazy - @@tex_head={ :p => nil, :l => nil } + @@tex_head={ + 'a4'=>{:p => nil, :l => nil}, + 'a5'=>{:p => nil, :l => nil}, + 'b5'=>{:p => nil, :l => nil}, + 'letter'=>{:p => nil, :l => nil}, + 'legal'=>{:p => nil, :l => nil}, + 'book'=>{:p => nil, :l => nil} + } @@flag_alt,@@flag_group,@@flag_code=false,false,false @@dp,@@prefix_b=nil,nil def initialize(particulars) @@ -260,30 +306,30 @@ module SiSU_TeX tell.txt_grey unless @md.cmd =~/q/ if @md.dc_rights use=@md.dc_rights.dup #dup is necessary, else contents of :rights changed - do_mono=SiSU_TeX_Pdf::Format_text_object.new(@md,use) + sp_char=SiSU_TeX_Pdf::Special_characters.new(@md,use) copymark=if @md.creator_copymark; '{\\begin{small}\\raisebox{1ex}{\\copyright}\\end{small}} ' else '' end copymark='Copyright {\\begin{small}\\raisebox{1ex}{\\copyright}\\end{small}} ' - copyright=do_mono.special_characters_safe.gsub(/^\s*Copyright \(C\)/, copymark) + copyright=sp_char.special_characters_safe.gsub(/^\s*Copyright \(C\)/, copymark) @@rights||="\n #{@@tex_backslash*2}[3]\\ \\linebreak #{copyright}" end if @md.prefix_b - do_mono=SiSU_TeX_Pdf::Format_text_object.new(@md,@md.prefix_b) - prefix_b=do_mono.special_characters_safe + sp_char=SiSU_TeX_Pdf::Special_characters.new(@md,@md.prefix_b) + prefix_b=sp_char.special_characters_safe @@prefix_b="\n #{@@tex_backslash*2}[3]\\ \\linebreak \\ #{prefix_b}\n" unless @@prefix_b end data=pre(data) data=footnote(data) if @md.flag_tables - data=tables(data) + data=tables(data) end ocn=if @md.markup.inspect =~/no_ocn/ \ or @md.mod.inspect =~/--no-ocn/ \ or @skin_no_ocn false - else true - end + else true + end data=number_paras(data,ocn) data=markup(data) output(data) @@ -298,22 +344,22 @@ module SiSU_TeX # DEBUG 2003w16 this is a kludge, because i could not get parameters # from param, Sort out ... revert to more elegant solution # even more of a kludge as had to insert newlines where code is used not satisfactory, think about - para=if para =~/<:br>|\n/; para.split(/<:br>|\n/) + para=if para =~/#{Mx[:br_nl]}|\n/; para.split(/#{Mx[:br_nl]}|\n/) else para end if para.class == String - @md.flag_tables=true if para =~/\\\}\\~/m,"\\footnote[\\1]{%\n \\2} ") #removed space before \\footnote 2004w21, watch - para.gsub!(/\\~\[([*+]\d+)\s+(.+?)<#@dp>\]\\~/m,"\\FootnoteA{\\1}{%\n \\2} ") #work on asterisk footnotes - para.gsub!(/\\~\\\{([*+]+)\s+(.+?)<#@dp>\\\}\\~/m,"\\FootnoteA{\\1}{%\n \\2} ") #work on asterisk footnotes + if para =~/#{Mx[:en_a_o]}[\d*+]+\s|#{Mx[:en_b_o]}([*+]\d+)\s/ # note escape not necessary in front of ~ has implications for many other matches #debug note + para.gsub!(/#{Mx[:en_a_o]}(\d+)\s+(.+?)#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/m,"\\footnote[\\1]{%\n \\2} ") #removed space before \\footnote 2004w21, watch + para.gsub!(/#{Mx[:en_b_o]}([*+]\d+)\s+(.+?)#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_b_c]}/m,"\\FootnoteA{\\1}{%\n \\2} ") #work on asterisk footnotes + para.gsub!(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/m,"\\FootnoteA{\\1}{%\n \\2} ") #work on asterisk footnotes end @tex_file << para end @tex_file end def tables_hash(md,para) - para_p=para.dup #visit - do_mono_l=SiSU_TeX_Pdf::Format_text_object.new(md,para) - do_mono_p=SiSU_TeX_Pdf::Format_text_object.new(md,para_p) - block={ - :l => do_mono_l.longtable_landscape, - :p => do_mono_p.longtable_portrait - } + @block={} + @para=para + @md.papersize_array.each do |ps| + @@tableheader={ ps => { :p => 0, :l => 0 }} + para,para_p=@para.dup,@para.dup #visit + format_l=SiSU_TeX_Pdf::Format_text_object.new(md,para,ps) + format_p=SiSU_TeX_Pdf::Format_text_object.new(md,para_p,ps) + @block[ps]={ + :l => format_l.longtable_landscape, + :p => format_p.longtable_portrait + } + end + @block end def tables(data) - @@tableheader={ :p => 0, :l => 0 } @tex_file=[] data.each do |para| - @tex_file << if para =~/¡|/ \ + if para =~/#{Mx[:gr_o]}(?:code|alt|verse|group)#{Mx[:gr_c]}/ \ or @@flag_alt - if para =~/<:(?:code|alt|verse|group)>/ + if para =~/#{Mx[:gr_o]}(?:code|alt|verse|group)#{Mx[:gr_c]}/ @lineone=case para - when /<:(?:alt|verse|group)>/; para - when /<:code>/; "#{@tex.paraskip_small} \\begin{scriptsize} " + para + when /#{Mx[:gr_o]}(?:alt|verse|group)#{Mx[:gr_c]}/; para + when /#{Mx[:gr_o]}code#{Mx[:gr_c]}/; "#{@tex.paraskip_small} \\begin{scriptsize} " + para else 'error' #should never occur end end if para =~/<=curly/ #takes care of escaped curly braces, expand - do_mono=SiSU_TeX_Pdf::Format_text_object.new(@md,para) - para=do_mono.special_characters_curly(para) + sp_char=SiSU_TeX_Pdf::Special_characters.new(@md,para) + para=sp_char.special_characters_curly(para) end - regx=/<:((?:code|alt|verse|group)(?:-end)?)>/m + regx=/#{Mx[:gr_o]}((?:code|alt|verse|group)(?:-end)?)#{Mx[:gr_c]}/m + x=nil x=regx.match(para)[1] if para =~regx - x=$1 - para.gsub!(/\n<:(?:code|alt|verse|group)>\n/m,'') + para.gsub!(/\n#{Mx[:gr_o]}(?:code|alt|verse|group)#{Mx[:gr_c]}\n/m,'') para=enclose(para,'code') unless para =~/^$/ if x =~/(?:alt|verse|group)/; @@flag_alt=true if x =~/group/; @@flag_group=true end elsif x =~/code/; @@flag_alt,@@flag_code=true,true - elsif @@flag_alt; + elsif @@flag_alt if para =~ /\}(?:https?|file|ftp)/m - para=mono.http #(@orientation) + para=SiSU_TeX_Pdf::Format_text_object.new(@md,para,'a4').http end @group_collect << para #<< "\n\n" end @@ -400,67 +451,84 @@ module SiSU_TeX group_collect=[] group_collect << '\begin{footnotesize} ' unless @@flag_code @group_collect.each do |x| - x.gsub!(/(<:\S+>||)/,' ') + x.gsub!(/(<:\S+>|#{Mx[:id_o]}.*?#{Mx[:id_c]}|#{Mx[:tc_o]}.*?#{Mx[:tc_c]}|#{Mx[:gr_o]}.*?#{Mx[:gr_c]}||)/,' ') #Mx fix x=x.split(/ \\\\ /) group_collect << x end group_collect << ' \end{footnotesize} ' unless @@flag_code @group_collect=group_collect.flatten - @lineone.gsub!(/(<:\S+>||)/,' ') + @lineone.gsub!(/(<:\S+>|#{Mx[:fa_o]}.*?#{Mx[:fa_c]}|#{Mx[:gr_o]}.*?#{Mx[:gr_c]}||)/,' ') #@group_collect.each{ |x| x.gsub!(/(.#{@@tilde}\S*\s*|<:\S+>||)/,' ') } - #@lineone.gsub!(/(.#{@@tilde}\S*\s*|<:\S+>||)/,' ') insert=[] - if para =~/<:code-end>/m + insert=if para =~/#{Mx[:gr_o]}code-end#{Mx[:gr_c]}/m insert << y + @lineone << @group_collect << ' \end{scriptsize}' << " #{@tex.paraskip_normal}" else insert << y + @lineone << @group_collect end + para.gsub!(/(<:\S+>|#{Mx[:fa_o]}.*?#{Mx[:fa_c]}|#{Mx[:gr_o]}.*?#{Mx[:gr_c]}||)/,' ') @@flag_alt,@@flag_group,@@flag_code=false,false,false @group_collect=[] - para.gsub!(/(<:\S+>||)/,' ') - #para.gsub!(/(.#{@@tilde}\S*\s*|<:\S+>||)/,' ') #@tex_file << insert.flatten - tex_f = insert.flatten + tex_f=insert.flatten end + para=tex_f else - if para =~ /\}(?:https?|file|ftp)/ - para=mono.http #(@orientation) - end + #if para =~ /\}(?:https?|file|ftp)/ + # para=mono.http #(@orientation) + #end case para - when /^1#{@@tilde}/; mono.level1 - when /^2#{@@tilde}/; mono.level2 - when /^3#{@@tilde}/; mono.level3 - when /^4#{@@tilde}/; mono.level4 - when /^5#{@@tilde}/; mono.level5 - when /^6#{@@tilde}/; mono.level6 - when /^<:i([1-9])>/; mono.indent($1) - when /<:=/; mono.symbol_graphic #watch - when /^\s*<:image\s+/; mono.image - when /\}image/; mono.png + when /^#{Mx[:lv_o_1]}/; mono.level1 + when /^#{Mx[:lv_o_2]}/; mono.level2 + when /^#{Mx[:lv_o_3]}/; mono.level3 + when /^#{Mx[:lv_o_4]}/; mono.level4 + when /^#{Mx[:lv_o_5]}/; mono.level5 + when /^#{Mx[:lv_o_6]}/; mono.level6 + when /^#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}/; mono.indent($1) + when /<:=/; mono.symbol_graphic #watch + when /^\s*<:image\s+/; mono.image + when /\}image/; mono.png else para.strip! para=enclose(para) unless para =~/^$/ end - para.gsub!(/(\.#{@@tilde}\S*\s*|<:\S+>||)/,' ') #% tread with care + para.gsub!(/\s*(?:#{Mx[:br_line]}|#{Mx[:br_nl]})\s*/,' \\\\\\\\ ') #% tread with care + para.gsub!(/(\.#{@@tilde}\S*\s*|<:\S+>|#{Mx[:fa_o]}.*?#{Mx[:fa_c]}|#{Mx[:gr_o]}.*?#{Mx[:gr_c]}||)/,' ') #% tread with care #para.gsub!(/(.#{@@tilde}(?:\\~\S+)?\s*|<:\S+>||)/,' ') #KEEP reference, problem escaping open curly braces \{ if para =~/<=curly/ #takes care of escaped curly braces, expand - do_mono=SiSU_TeX_Pdf::Format_text_object.new(@md,para) - para=do_mono.special_characters_curly(para) + sp_char=SiSU_TeX_Pdf::Special_characters.new(@md,para) + para=sp_char.special_characters_curly(para) + end + para + end + if para =~ /\}(?:https?|file|ftp)/ + para=if para !~/\.(?:png|jpg|gif)/ + SiSU_TeX_Pdf::Format_text_object.new(@md,para,'a4').http + else + @block={} + @md.papersize_array.each do |ps| + image=SiSU_TeX_Pdf::Format_text_object.new(@md,para,ps).http + @block[ps]={ + :l => image, + :p => image + } + end + @block end - tex_f = para end - tex_f + para end def markup(data) @tex_file=[] md={} #% document headers watch special characters in title & subtitle: glark "\{\{~(sub)?title.+?[$&%#_\{\}]" *.(lm|er)?? - home=@vz.txt_home.gsub(/<(br|p)>|<\/\s*(br|p)>|<(br|p)\s*\/>|\\\\/,' - ') #no line splitting in heading neither html nor latex - title=@md.title.gsub(/<(br|p)>|<\/\s*(br|p)>|<(br|p)\s*\/>|\\\\/,' - ') #no line splitting in heading neither html nor latex - subtitle=@md.subtitle.gsub(/<(br|p)>|<\/\s*(br|p)>|<(br|p)\s*\/>|\\\\/,' - ') if @md.subtitle #no line splitting in heading neither html nor latex - orient_portrait=SiSU_TeX_Pdf::Format_text_object.new(@md,'portrait',"#{home}: - #{title} #{subtitle}") - orient_landscape=SiSU_TeX_Pdf::Format_text_object.new(@md,'landscape',"#{home}: - #{title} #{subtitle}") - @@tex_head[:p]=orient_portrait.document_head_with_orientation - @@tex_head[:l]=orient_landscape.document_head_with_orientation + home=@vz.txt_home.gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}|#{Mx[:br_paragraph]}|\\\\/,' - ') #no line splitting in heading neither html nor latex + title=@md.title.gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}|#{Mx[:br_paragraph]}|\\\\/,' - ') #no line splitting in heading neither html nor latex + subtitle=@md.subtitle.gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}|#{Mx[:br_paragraph]}|\\\\/,' - ') if @md.subtitle #no line splitting in heading neither html nor latex + @md.papersize_array.each do |ps| + orient_portrait=SiSU_TeX_Pdf::Format_head.new(@md,ps,'portrait',"#{home}: - #{title} #{subtitle}") + orient_landscape=SiSU_TeX_Pdf::Format_head.new(@md,ps,'landscape',"#{home}: - #{title} #{subtitle}") + @@tex_head[ps][:p]=orient_portrait.document_head_with_orientation + @@tex_head[ps][:l]=orient_landscape.document_head_with_orientation + end @tex_file <<< markup_common(para[:p]), - :l => markup_common(para[:l]) - } + if ( para['a4'] or para['a5'] or para['b5'] \ + or para['letter'] or para['legal']) + @md.papersize_array.each do |ps| + if para[ps] + if (para[ps][:p] and para[ps][:l]) + para[ps]={ + :p => markup_common(para[ps][:p]), + :l => markup_common(para[ps][:l]) + } + else p "here #{__FILE__} #{__LINE__}" if @md.cmd.inspect =~/M/ + end + end + end + elsif (para[:p] and para[:l]) + para = { + :p => markup_common(para[:p]), + :l => markup_common(para[:l]) + } + else p "here #{__FILE__} #{__LINE__}" if @md.cmd.inspect =~/M/ + end end @tex_file << para end @@ -553,7 +637,7 @@ WOK url=%<#{l[:url]}> url.gsub!(/(?:\\)*([$&~%_#}{^])/,"\\\\\\1") #latex special chars s_lnk=l[:say] - s_lnk.gsub!(/(
|
|

|

)/,'') + s_lnk.gsub!(/\s*(#{Mx[:br_line]}|#{Mx[:br_nl]}|#{Mx[:br_paragraph]})\s*/,' \\\\\\\\ ') s_lnk.gsub!(/(?:\\)*([$&~%_#}{^])/,"\\\\\\1") #latex special chars if url !~/^\.(\.)?\// s_lnk_url=%<\\begin{scriptsize}\\href{#{url}}{#{url}}\\end{scriptsize}> # note this bit of dereferencing magic @@ -589,77 +673,116 @@ WOK @tex_file << "\n\\end{document}" end def number_paras_numbering(para,ocn) - if para =~/<\\~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ \ - and para !~/\\end\{longtable\}|/ #catch - m=/(.+?)<\\~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m + if para =~/#{Mx[:id_o]}\\~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ \ + and para !~/\\end\{longtable\}|#{Mx[:br_eof]}/ #catch + m=/(.+?)#{Mx[:id_o]}\\~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m parablock=para[m,1] paranum=if ocn; para[m,2] else '' end do_duo=SiSU_TeX_Pdf::Format_text_object.new(@md,parablock,paranum) para=do_duo.para_num if parablock - elsif para =~/^<~\d+;(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ #2005 this is added for tables, rationalise - m=/<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m + elsif para =~/^#{Mx[:id_o]}~\d+;(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #2005 this is added for tables, rationalise + m=/#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m paranum=para[m,1] - para.gsub!(/<~\d+;(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>/,'') + para.gsub!(/#{Mx[:id_o]}~\d+;(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'') para="\\marginpar{\\begin{tiny}#{paranum}\\end{tiny}}" + para - elsif para =~/^<~\d+;(?:[oh]|[0-6]:)\d+;\w\d+>/ #extra 2005 this is added for tables, rationalise - m=/<~(\d+);[oh]\d+;\w\d+>/m + elsif para =~/^#{Mx[:id_o]}~\d+;(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}/ #extra 2005 this is added for tables, rationalise + m=/#{Mx[:id_o]}~(\d+);[oh]\d+;\w\d+#{Mx[:id_c]}/m paranum=para[m,1] - para.gsub!(/<~\d+;(?:[oh]|[0-6]:)\d+;\w\d+>/,'') + para.gsub!(/#{Mx[:id_o]}~\d+;(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}/,'') para="\\marginpar{\\begin{tiny}#{paranum}\\end{tiny}}" + para elsif para =~/\\end\{longtable\}/ #catch - para.gsub!(/<\\~\d+>|<\\~(\d+);(?:[ohm]|[0-6]:)\d+;\w\d+><#@dp:#@dp>/,'') - para.gsub!(/<\\~\d+>|<\\~(\d+);(?:[ohm]|[0-6]:)\d+;\w\d+>/,'') #extra + para.gsub!(/#{Mx[:id_o]}\\~\d+#{Mx[:id_c]}|#{Mx[:id_o]}\\~(\d+);(?:[ohm]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'') + para.gsub!(/#{Mx[:id_o]}\\~\d+#{Mx[:id_c]}|#{Mx[:id_o]}\\~(\d+);(?:[ohm]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}/,'') #extra end para end def number_paras(data,ocn) tex_file=[] data.each do |para| - para=if para.class == Hash \ - and (para[:p] and para[:l]) - para = { - :p => number_paras_numbering(para[:p],ocn), - :l => number_paras_numbering(para[:l],ocn) - } + para=if para.class == Hash + if ( para['a4'] or para['a5'] or para['b5'] \ + or para['letter'] or para['legal']) + @para={} + @md.papersize_array.each do |ps| + if para[ps] + if (para[ps][:p] and para[ps][:l]) + @para[ps]={ + :p => number_paras_numbering(para[ps][:p],ocn), + :l => number_paras_numbering(para[ps][:l],ocn) + } + else p "here #{__FILE__} #{__LINE__}" if @md.cmd.inspect =~/M/ + end + end + end + elsif (para[:p] and para[:l]) + @para = { + :p => number_paras_numbering(para[:p],ocn), + :l => number_paras_numbering(para[:l],ocn) + } + else p "here #{__FILE__} #{__LINE__}" if @md.cmd.inspect =~/M/ + end else #elsif para.class == String - number_paras_numbering(para,ocn) + @para=number_paras_numbering(para,ocn) end - tex_file << para + tex_file << @para end tex_file end def output(array) array.flatten! array.compact! + @array=array fns_l=@md.fns.gsub(/~/,'-') #this is a sorry fix, but necessary as it appears latex programs like not ~ - filename_tex_landscape=File.new("#{@env.path.tex}/#{fns_l}.landscape.tex",'w+') - filename_tex_portrait=File.new("#{@env.path.tex}/#{fns_l}.tex",'w+') - filename_tex_portrait << @@tex_head[:p] - filename_tex_landscape << @@tex_head[:l] - array.each do |para| - case para - when String - para.gsub!(/^\s+/,'') - if para !~/\A\s*\Z/ - filename_tex_portrait.puts para,"\n" - filename_tex_landscape.puts para,"\n" - end - when Hash - para[:p].gsub!(/^\s+/,'') - para[:l].gsub!(/^\s+/,'') - if para[:p] !~/\A\s*\Z/ - filename_tex_portrait.puts para[:p],"\n" - end - if para[:l] !~/\A\s*\Z/ - filename_tex_landscape.puts para[:l],"\n" + @md.papersize_array.each do |ps| + filename_tex_landscape=File.new("#{@env.path.tex}/#{fns_l}.#{ps}.landscape.tex",'w+') + filename_tex_portrait=File.new("#{@env.path.tex}/#{fns_l}.#{ps}.tex",'w+') + filename_tex_portrait << @@tex_head[ps][:p] + filename_tex_landscape << @@tex_head[ps][:l] + array.each do |para| + case para + when String + para.gsub!(/^\s+/,'') + if para !~/\A\s*\Z/ + filename_tex_portrait.puts para,"\n" + filename_tex_landscape.puts para,"\n" + end + when Hash + if para[ps] and (para[ps][:p] and para[ps][:l]) + para[ps][:p].gsub!(/^\s+/,'') if para[ps][:p] + para[ps][:l].gsub!(/^\s+/,'') if para[ps][:l] + if para[ps][:p] !~/\A\s*\Z/ + filename_tex_portrait.puts para[ps][:p],"\n" + end + if para[ps][:l] !~/\A\s*\Z/ + filename_tex_landscape.puts para[ps][:l],"\n" + end + elsif (para[:p] and para[:l]) + para[:p].gsub!(/^\s+/,'') if para[:p] + para[:l].gsub!(/^\s+/,'') if para[:l] + if para[:p] !~/\A\s*\Z/ + filename_tex_portrait.puts para[:p],"\n" + end + if para[:l] !~/\A\s*\Z/ + filename_tex_landscape.puts para[:l],"\n" + end + else p "here #{__FILE__} #{__LINE__}" if @md.cmd.inspect =~/M/ + end end end + array=@array + filename_tex_portrait.close + filename_tex_landscape.close end - filename_tex_portrait.close - filename_tex_landscape.close - @@tex_head={} #@@tex_head={ :p => nil, :l => nil } + @@tex_head={ + 'a4'=>{:p => nil, :l => nil}, + 'a5'=>{:p => nil, :l => nil}, + 'b5'=>{:p => nil, :l => nil}, + 'letter'=>{:p => nil, :l => nil}, + 'legal'=>{:p => nil, :l => nil}, + 'book'=>{:p => nil, :l => nil} + } array=[] end end diff --git a/lib/sisu/v0/texpdf_format.rb b/lib/sisu/v0/texpdf_format.rb index ab4c7fe2..0fa439f0 100644 --- a/lib/sisu/v0/texpdf_format.rb +++ b/lib/sisu/v0/texpdf_format.rb @@ -61,215 +61,22 @@ module SiSU_TeX_Pdf @@table_pg_break_counter=1 include SiSU_Viz - class Use_TeX - attr_accessor :url,:txt,:date - def initialize(md) - @md=md - @vz=SiSU_Env::Get_init.instance.skin - @date=SiSU_Env::Info_date.new # #{@date.year} - @copymark='{\\begin{footnotesize}\\raisebox{1ex}{\\copyright}\\end{footnotesize}}' - @url_brace=SiSU_Viz::Skin.new.url_decoration - end - def skip - "\n\\vspace*{\\smallskipamount} \n" - end - def paraskip_normal - '\setlength{\parskip}{1ex plus0.5ex minus0.2ex}' - end - def paraskip_small - '\setlength{\parskip}{0.5ex plus0.2ex minus0.1ex}' - end - def skip_small - #"\\smallskip{}" - end - def skip_small_vspace - "\n\\vspace*{\\smallskipamount} \n" - end - def skip_small_footnote - #"\n\\smallskip{}\n" - end - def skip_medium - "\n\\medskip{}\n\n" - end - def skip_dummy - "\n" - end - def header - "\\lhead[ ]{ }\n" + - "\\chead[ \\fancyplain{} \\bfseries \\footnotesize \\leftmark ]{ \\fancyplain{} \\bfseries \\footnotesize \\rightmark }\n" + - "\\rhead[ ]{ }\n" - end - def footer - base_prog_txt=if @md.base_program - case @md.base_program - when /kdissert/i; " \\\\ \\href{http://freehackers.org/~tnagy/kdissert/}{Kdissert}" - else '' - end - else '' - end - "\\lfoot[\\textrm{\\thepage}]{\\tiny \\href{#{@vz.url_sisu}}{#{@vz.txt_signature}}#{base_prog_txt}}\n" + - "\\cfoot[\\href{#{@vz.url_home}}{#{@vz.url_txt}}]{\\href{#{@vz.url_home}}{#{@vz.url_txt}}}\n" + - "\\rfoot[\\tiny \\href{#{@vz.url_sisu}}{#{@vz.txt_signature}}]{\\textrm{\\thepage}}\n" - end - def site - "\\href{#{@vz.url_home}}{#{@vz.url_txt}}" - end - def sitename #owners site, eg freeculture, free.for.all, gutenberg etc. - "\\href{#{@vz.url_home}}{#{@vz.txt_home}}" - end - def owner_chapter - "Contact Details for Original Promulgating Authority" - end - #BOOK standard dimensions - 229x156 - def newpage(orientation) - case orientation - when /landscape/ # using longtable latex package -< 0, :l => 0 } + @@tex_pattern_margin_number=/\\begin\{tiny\}~\\end\{tiny\}\{\\marginpar.+?\}\}\}/ + @@tableheader={ + 'a4' => { :p => 0, :l => 0 }, + 'a5' => { :p => 0, :l => 0 }, + 'b5' => { :p => 0, :l => 0 }, + 'letter' => { :p => 0, :l => 0 }, + 'legal' => { :p => 0, :l => 0 } + } @@sys=SiSU_Env::System_call.new - @@flag_code=false @@dp=nil def initialize(md,string,string1=nil) @md,@string,@string1=md,string,string1 @@ -283,13 +90,14 @@ WOK @tx=SiSU_Env::Get_init.instance.tex @url_brace=SiSU_Viz::Skin.new.url_decoration @tex2pdf=@@tex3pdf ||=SiSU_Env::System_call.new.tex2pdf_engine + @ps=string1 if string1=~/(?:a4|letter|legal|book|a5|b5)/i end def longtable_landscape @end_table='\end{longtable}' @row_break='\\\\\\' - if @string[//u] + if @string[/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}\s+c(\d+);(.+?)#{Mx[:tc_p]}\\~(\d+;\w\d+;\w\d+)#{Mx[:gr_c]}/u] no_of_cols,cols_width,ocn=$1,$2,$3 - tw=case @md.papersize + tw=case @ps when /a4/i; @tx.a4.landscape.w #European default, SiSU default when /letter/i; @tx.letter.landscape.w #U.S. default when /legal/i; @tx.legal.landscape.w #U.S. alternative @@ -298,9 +106,10 @@ WOK else @tx.a4.landscape.w #default currently A4 end textwidth=(tw.to_i/2) - 24 - @@tableheader[:l]=1 if @string =~/ 0 @colW=[] @colW << '{' w.each do |x| @@ -311,39 +120,40 @@ WOK end @colW << '}' @colW=@colW.join - @start_table="<~#{ocn}>\n\\setlength{\\LTleft}{0pt}\n\\setlength{\\LTright}{\\fill}\n" + + @start_table="#{Mx[:id_o]}~#{ocn}#{Mx[:id_c]}\n\\setlength{\\LTleft}{0pt}\n\\setlength{\\LTright}{\\fill}\n" + "\\begin{tiny}\n\\begin{longtable}#@colW\n" - @string.gsub!(//u,@start_table) + @string.gsub!(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}\s+c\d+?;.+#{Mx[:tc_p]}\\~\d+;\w\d+;\w\d+#{Mx[:gr_c]}/u,@start_table) end - if @string =~// - @string.gsub!(//," #@end_table\n\\end{tiny}") + if @string =~/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/ + @string.gsub!(/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/," #@end_table\n\\end{tiny}") end - @string.gsub!(//,1] @string.gsub!(/\/,'') - @string.gsub!(/¡\d+?¡(.+?)(?:¡|!)/u,'\bfseries \1&') - @string.gsub!(/&>\s*$/," #@row_break \\hline\\endhead #@row_break") + @string.gsub!(/#{Mx[:tc_p]}\d+?#{Mx[:tc_p]}(.+?)(?:#{Mx[:tc_p]}|#{Mx[:tc_c]})/u,'\bfseries \1&') + @string.gsub!(/&\s*$/," #@row_break \\hline\\endhead #@row_break") @string="#@string \\multicolumn{#{@@number_of_cols}}{l}{\\tiny #{tablefoot}} \\\\ \\hline\n\\endfoot\n\\hline\n" if tablefoot - @@tableheader[:l],@@number_of_cols=0,0 + @@tableheader[@ps][:l]=0 + #@@tableheader[@ps][:l],@@number_of_cols=0,0 end else - if @string =~/¡\d+?¡(.+?)(?:¡|!)/u - @string.gsub!(/¡\d+?¡(.+?)(?:¡|!)/u,'\1&') - @string.gsub!(/&>\s*$/," #@row_break") + if @string =~/#{Mx[:tc_p]}\d+?#{Mx[:tc_p]}(.+?)(?:#{Mx[:tc_p]}|#{Mx[:tc_c]})/u + @string.gsub!(/#{Mx[:tc_p]}\d+?#{Mx[:tc_p]}(.+?)(?:#{Mx[:tc_p]}|!)/u,'\1&') + @string.gsub!(/&\s*$/," #@row_break") end end - @string=if ocn; "<~#{ocn}>" + @string + @string=if ocn; "#{Mx[:id_o]}~#{ocn}#{Mx[:id_c]}" + @string else @string end end def longtable_portrait @end_table='\end{longtable}' @row_break='\\\\\\' - if @string[//u] + if @string[/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}\s+c(\d+);(.+?)#{Mx[:tc_p]}\\~(\d+;\w\d+;\w\d+)#{Mx[:gr_c]}/u] no_of_cols,cols_width,ocn=$1,$2,$3 - tw=case @md.papersize + tw=case @ps when /a4/i; @tx.a4.portrait.w #European default, SiSU default when /letter/i; @tx.letter.portrait.w #U.S. default when /legal/i; @tx.legal.portrait.w #U.S. alternative @@ -352,9 +162,9 @@ WOK else @tx.a4.portrait.w #default currently A4 end textwidth=tw.to_i - 20 - @@tableheader[:p]=1 if @string =~/\n\\setlength{\\LTleft}{0pt}\n\\setlength{\\LTright}{\\fill}\n" + + @start_table="#{Mx[:id_o]}~#{ocn}#{Mx[:id_c]}\n\\setlength{\\LTleft}{0pt}\n\\setlength{\\LTright}{\\fill}\n" + "\\begin{tiny}\n\\begin{longtable}#@colW\n" - @string.gsub!(//u,"#@start_table") + @string.gsub!(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}\s+c\d+?;.+#{Mx[:tc_p]}\\~\d+;\w\d+;\w\d+#{Mx[:gr_c]}/u,"#@start_table") end - if @string =~// - @string.gsub!(//," #@end_table\n\\end{tiny}") + if @string =~/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/ + @string.gsub!(/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/," #@end_table\n\\end{tiny}") end - @string.gsub!(//,1] @string.gsub!(/\/,'') - @string.gsub!(/¡\d+?¡(.+?)(?:¡|!)/u,'\bfseries \1&') - @string.gsub!(/&>\s*$/," #@row_break \\hline\\endhead #@row_break") + @string.gsub!(/#{Mx[:tc_p]}\d+?#{Mx[:tc_p]}(.+?)(?:#{Mx[:tc_p]}|#{Mx[:tc_c]})/u,'\bfseries \1&') + @string.gsub!(/&\s*$/," #@row_break \\hline\\endhead #@row_break") @string="#@string \\multicolumn{#{@@number_of_cols}}{l}{\\tiny #{tablefoot}} \\\\ \\hline\n\\endfoot\n\\hline\n" if tablefoot - @@tableheader[:p],@@number_of_cols=0,0 + @@tableheader[@ps][:p]=0 + #@@tableheader[@ps][:p],@@number_of_cols=0,0 end else - if @string =~/¡\d+?¡(.+?)(?:¡|!)/u - @string.gsub!(/¡\d+?¡(.+?)(?:¡|!)/u,'\1&') - @string.gsub!(/&>\s*$/," #@row_break") + if @string =~/#{Mx[:tc_p]}\d+?#{Mx[:tc_p]}(.+?)(?:#{Mx[:tc_p]}|#{Mx[:tc_c]})/u + @string.gsub!(/#{Mx[:tc_p]}\d+?#{Mx[:tc_p]}(.+?)(?:#{Mx[:tc_p]}|#{Mx[:tc_c]})/u,'\1&') + @string.gsub!(/&\s*$/," #@row_break") end end - @string=if ocn; "<~#{ocn}>" + @string + @string=if ocn; "#{Mx[:id_o]}~#{ocn}#{Mx[:id_c]}" + @string else @string end end @@ -399,7 +210,7 @@ WOK @end_table="\\end{tabular}" @row_break='\\\\\\\\' @break_page="#@row_break\n#@row_break \n" - if @string[//u] + if @string[/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}\s+c(\d+);(.+?)#{Mx[:gr_c]}/u] no_of_cols,cols_width=$1,$2 @w=cols_width.split(/;\s*/) @colW=[] @@ -408,391 +219,49 @@ WOK @colW << "p{#{col_w}cm}" if col_w end @start_table="\\begin{tabular}{#@colW}\n" - @string.gsub!(//u,"#@start_table}") + @string.gsub!(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}\s+c\d+?;.+#{Mx[:gr_c]}/u,"#@start_table}") end - if @string =~// - @string.gsub!(//,"#@end_table") + if @string =~/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/ + @string.gsub!(/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/,"#@end_table") @@table_pg_break_counter=1 end - if @string =~//,1] @string.gsub!(/\/,'') end end - if @string =~/¡\d+?¡(.+?)(?:¡|!)/u - @string.gsub!(/¡\d+?¡(.+?)(?:¡|!)/u,"\\1&") - @string.gsub!(/&>\s*$/,"#@row_break") + if @string =~/#{Mx[:tc_p]}\d+?#{Mx[:tc_p]}(.+?)(?:#{Mx[:tc_p]}|#{Mx[:tc_p]})/u + @string.gsub!(/#{Mx[:tc_p]}\d+?#{Mx[:tc_p]}(.+?)(?:#{Mx[:tc_p]}|#{Mx[:tc_p]})/u,"\\1&") + @string.gsub!(/&\s*$/,"#@row_break") end @string end - def pdftex_special_characters_1(string) # ~ ^ $ & % _ { } #LaTeX special characters - KEEP list - #p @@utf_8.list - #@string=Iconv.conv('ISO-8859-1', 'UTF-8', @string) - word=string.scan(/\S+|\n/) #unless line =~/^(?:0~\S|%+\s)/ - para_array=[] - string=if word - word.each do |w| # _ - / # | : ! ^ ~ - unless string =~/^(?:0~|%+ |') unless w=~/^[1-6]~|~\{|\}~|~\[|\]~|^\^~\s|~\^|\*~\S+|~#|\{t~|<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/ - w.gsub!(/&#(?:126|152);/,'<=tilde>') #126 usual - #w.gsub!(/&#(?:126|152);/,'<=tilde>') unless w=~/https?:\/\/\S+/ #126 usual - w.gsub!(/\\?\|||/,'<=pipe>') #unless w=~/<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/ # | SiSU not really special sisu character but done, also LaTeX - end - para_array << w - end - string=para_array.join(' ') - string=string.strip - string - else '' + def heading_major(para,lev) #\emph{ + title=@md.title + para.strip! if para + para.gsub!(/(?:\\begin\{bfseries\}|\\begin\{itshape\})(.+?)(?:\\end\{bfseries\}|\\end\{itshape\})/m,'\1') + cont_ln=para.dup + cont_ln.gsub!(@@tex_pattern_margin_number,'') + cont_ln.gsub!(/#{Mx[:lv_o]}#{lev}:\S*?#{Mx[:lv_c]}\s*/,'') + if para =~/\\[Ff]ootnote/ #and para =~/^[1-6]#{@@tilde}/ # removing footnotes from headings! + cont_ln.gsub!(/\s*\\footnote\[\d+\]\{%\n .+? \}\s*/,' ') + cont_ln.gsub!(/\s*\\Footnote[A]\{[*+]+\d*\}\{%\n .+? \}\s*/,' ') end - string.gsub!(/<~\d+;(?:\w|[0-6]:)\d+;[umdv]\d+><#@dp:#@dp>/,'') - string.gsub!(/.+?<-#>/,'') - string.gsub!(//,'') - string.gsub!(//,'') - #problem sequence -> - string.gsub!(/&(?:nbsp);/,'<=hardspace>') # < SiSU special character also LaTeX - string.gsub!(/&(?:lt|#060);/,'<=lt>') # < SiSU special character also LaTeX - string.gsub!(/&(?:gt|#062);/,'<=gt>') # > SiSU special character also LaTeX - string.gsub!(/{/,'<=curlyopen>') # { SiSU special character also LaTeX - string.gsub!(/}/,'<=curlyclose>') # } SiSU special character also LaTeX - string.gsub!(/&#(?:126|152);/,'<=tilde>') # ~ SiSU special character also LaTeX - string.gsub!(/#/,'\#') # # SiSU special character also LaTeX - string.gsub!(/!/,'!') # ! SiSU not really special sisu character but done, also LaTeX - string.gsub!(/*/,'*') # * should you wish to escape astrisk e.g. describing \*{bold}* - string.gsub!(/-/,'-') # - SiSU special character also LaTeX - string.gsub!(/+/,'+') # + SiSU special character also LaTeX - string.gsub!(/,/,',') # + SiSU special character also LaTeX - string.gsub!(/&/,'<=amp>') #unless @string=~/<:code>/ # / SiSU special character also LaTeX - string.gsub!(///,'<=slash>') # / SiSU special character also LaTeX - string.gsub!(/\/,'<=backslash>') # \ SiSU special character also LaTeX - string.gsub!(/_/,'<=underscore>') # _ SiSU special character also LaTeX - string.gsub!(/|/,'|') # | SiSU not really special sisu character but done, also LaTeX - string.gsub!(/:/,':') # : SiSU not really special sisu character but done, also LaTeX - string.gsub!(/^|\^/,'<=caret>') # ^ SiSU not really special sisu character but done, also LaTeX - string.gsub!(/\#/,'<=hash>') - ##watch placement, problem sequence ^ - string.gsub!(/&atild;<\/font><\/sup>/,' ') - string.gsub!(/<:pb>/,'\newpage') - string.gsub!(/<:pn>/,'\clearpage') - string.gsub!(/\\copy(right|mark)?/,'<=copymark>') # ok problem with superscript - string - end - def pdftex_special_characters_2(string) - string.gsub!(/œ/,'\oe ') - string.gsub!(/\$/,'\$') - string.gsub!(/\#/,'\#') - string.gsub!(/\%/,'\%') - string.gsub!(/\~/,'\~') #revist, should not be necessary to mark remaining tildes - if string !~/^\s*<:image|\}:image\s/ - string.gsub!(/_/,'\_') - end - string.gsub!(/\{/,'\{') - string.gsub!(/\}/,'\}') - string.gsub!(/ /,'~') # ~ character for hardspace - # sequence important must appear after removal of { and } - string.gsub!(/&\S+?;/,'') #hmmm - # sequence imortant place before removal of & - if string=~/<:code>/; @@flag_code=true - elsif string=~/<:code-end>/; @@flag_code=false - end - if @@flag_code; string.gsub!(/&/,'{\\\&}') - else string.gsub!(/(\s+&\s+)/,' and ') - end - string.gsub!(/§/u,'\S') #latex: space between next character not preserved? #string.gsub!(/§ /,'\S ') - string.gsub!(/£/u,'\pounds') - string.gsub!(/&\S+?;/,' ') - string.gsub!(//,' ') - string.gsub!(/<\/a>/,' ') - string.gsub!(/[^\}>_]((?:https?|file|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case - string.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url - string.gsub!(/\B(?:\\_|\\)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration - unless @@flag_code - string.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration positive lookahead, sequence issue with { linked }http://url cannot use \b at start - else #code-block: angle brackets special characters, note _ already escaped - string.gsub!(/\\_/,'{\UseTextSymbol{OML}{>}}') - end - string.gsub!(/<:ee>/,'') - string.gsub!(//,' ') - #proposed change, insert, but may be redundant - string.gsub!(/ \/><:i[12]>(.+?)(?:\}~||<\/\s*(br|p)>|<(br|p)\s*\/>/," #{@@tex_backslash*2} ") # Work Area - string.gsub!(/(.+?)<\/b>/,'\begin{bfseries}\1 \end{bfseries}') - string.gsub!(/(.+?)<\/em>/,'\begin{bfseries}\1 \end{bfseries}') - string.gsub!(/<(bold|strong)>(.+?)<\/(bold|strong)>/,'\begin{bfseries}\1 \end{bfseries}') - string.gsub!(/(.+?)<\/h\d+>/,'\begin{bfseries}\1 \end{bfseries}') - string.gsub!(/(.+?)<\/i>/,'\emph{\1}') - string.gsub!(/(.+?)<\/italic>/,'\emph{\1}') - string.gsub!(/(.+?)<\/u>/,'\uline{\1}') # ulem - string.gsub!(/(.+?)<\/cite>/,"``\\1''") # quote - string.gsub!(/(.+?)<\/ins>/,'\uline{\1}') # ulem - string.gsub!(/(.+?)<\/del>/,'\sout{\1}') # ulem - string.gsub!(/(.+?)<\/sub>/,"\$_{\\textrm{\\1}}\$") - string.gsub!(/(.+?)<\/sup>/,"\$^{\\textrm{\\1}}\$") - unless @@flag_code - string.gsub!(/"(.+?)"/,'“\1”') # quote marks / quotations open & close " need condition exclude for code - string.gsub!(/\s+"/,' “') # open " - string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*"/,'\1“') # open " - string.gsub!(/"(\s|\.|,|:|;)/,'”\1') # close " - string.gsub!(/"([1-6-]#{@@tilde}\S*|<.+?>)?\s*$/,'”\1') # close " - string.gsub!(/"(\.|,)/,'”') # close " - string.gsub!(/\s+'/,' `') # open ' - string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*'/,'\1`') # open ' - end - string.gsub!(/^(<:i[1-9]>)?\s*\\_\*\s*/,'\1 \begin{math} \bullet \end{math}~~') #bullets - added 2004w17 watch \\_ - string.gsub!(/(|<\/font>)/,'') - string.gsub!(/\s*(\S+?)<\/sup>/,'^\1') - string.gsub!(/(|<\/sup>)/,'') - string - end - def pdftex_special_characters_3(string) - string.gsub!(/])/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder - string.gsub!(/([^<][^b][^r]\s+)\/>/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder - #problem sequence (another kludge) -> - string.gsub!(/<=lt>/,'{\UseTextSymbol{OML}{<}}') - string.gsub!(/<=gt>/,'{\UseTextSymbol{OML}{>}}') - #string.gsub!(/<=lt>/,'\<') - #string.gsub!(/<=gt>/,'\>') - string.gsub!(/<=underscore>/,'\_') - string.gsub!(/(\href\{http:\/\/\S+?)(?:(?:<=tilde>)(\S+))+\}/,'\1\~\2}') #tildes in urls \href treated differently from text - string.gsub!(/<=tilde>/,'{\~~}') - string.gsub!(/<=pipe>/,'{\textbar}') - string.gsub!(/<=caret>/,'{\^{~}}') - #string.gsub!(/<=caret>/,'\^{}') - string.gsub!(/<=exclaim>/,'\Verbatim{!}') - string.gsub!(/<=hash>/,'{\#}') - #string.gsub!(/<=hash>/,'{\UseTextSymbol{OT1}{#}}') - #string.gsub!(/<=slash>/,'{\slash}') - string.gsub!(/<=hardspace>/,'{~}') #changed ... 2005 - string.gsub!(/<=amp>/,'{\\\&}') #changed ... 2005 - #string.gsub!(/<=amp>/,'{\UseTextSymbol{OT1}{&}}') - string.gsub!(/<=slash>/,'{/}') - string.gsub!(/<=backslash>/,'{\textbackslash}') - #string.gsub!(/<=asterisk>/,'*') - #string.gsub!(/<=exclaim>/,'!') - #string.gsub!(/<=asterisk>/,'{\ast}') - #string.gsub!(/<=copymark>/,"^{\\copyright} ") # watch has been problematic - #copymark='{\\begin{small}\\raisebox{1ex}{\\copyright}\\end{small}} ' - string.gsub!(/<=copymark>\s*(.+)?\s+(<\\~\d+;\w(?:[0-6]:)?\d+;\w\d+><#@dp:#@dp>)/,"^\\copyright \\textnormal{\\1} \\2") # watch likely to be problematic - string - end - def xetex_special_characters_1(string) # ~ ^ $ & % _ { } #LaTeX special characters - KEEP list - #p @@utf_8.list - #string=Iconv.conv('ISO-8859-1', 'UTF-8', @string) - word=string.scan(/\S+|\n/) #unless line =~/^(?:0~\S|%+\s)/ - para_array=[] - string=if word - word.each do |w| # _ - / # | : ! ^ ~ - unless string =~/^(?:0~|%+ |') unless w=~/^[1-6]~|~\{|\}~|~\[|\]~|^\^~\s|~\^|\*~\S+|~#|\{t~|<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/ - w.gsub!(/&#(?:126|152);/,'<=tilde>') #126 usual - #w.gsub!(/&#(?:126|152);/,'<=tilde>') unless w=~/https?:\/\/\S+/ #126 usual - w.gsub!(/\\?\|||/,'<=pipe>') #unless w=~/<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/ # | SiSU not really special sisu character but done, also LaTeX - end - para_array << w - end - string=para_array.join(' ') - string=string.strip - string - else '' - end - string.gsub!(/<~\d+;(?:\w|[0-6]:)\d+;[umdv]\d+><#@dp:#@dp>/,'') - string.gsub!(/.+?<-#>/,'') - string.gsub!(//,'') - string.gsub!(//,'') - #problem sequence -> - string.gsub!(/&(?:nbsp);/,'<=hardspace>') # < SiSU special character also LaTeX - string.gsub!(/&(?:lt|#060);/,'<=lt>') # < SiSU special character also LaTeX - string.gsub!(/&(?:gt|#062);/,'<=gt>') # > SiSU special character also LaTeX - string.gsub!(/{/,'<=curlyopen>') # { SiSU special character also LaTeX - string.gsub!(/}/,'<=curlyclose>') # } SiSU special character also LaTeX - string.gsub!(/&#(?:126|152);/,'<=tilde>') # ~ SiSU special character also LaTeX - string.gsub!(/#/,'\#') # # SiSU special character also LaTeX - string.gsub!(/!/,'!') # ! SiSU not really special sisu character but done, also LaTeX - string.gsub!(/*/,'*') # * should you wish to escape astrisk e.g. describing \*{bold}* - string.gsub!(/-/,'-') # - SiSU special character also LaTeX - string.gsub!(/+/,'+') # + SiSU special character also LaTeX - string.gsub!(/,/,',') # + SiSU special character also LaTeX - string.gsub!(/&/,'<=amp>') #unless @string=~/<:code>/ # / SiSU special character also LaTeX - string.gsub!(///,'<=slash>') # / SiSU special character also LaTeX - string.gsub!(/\/,'<=backslash>') # \ SiSU special character also LaTeX - string.gsub!(/_/,'<=underscore>') # _ SiSU special character also LaTeX - string.gsub!(/|/,'|') # | SiSU not really special sisu character but done, also LaTeX - string.gsub!(/:/,':') # : SiSU not really special sisu character but done, also LaTeX - string.gsub!(/^|\^/,'<=caret>') # ^ SiSU not really special sisu character but done, also LaTeX - string.gsub!(/\#/,'<=hash>') - ##watch placement, problem sequence ^ - string.gsub!(/&atild;<\/font><\/sup>/,' ') - string.gsub!(/<:pb>/,'\newpage') - string.gsub!(/<:pn>/,'\clearpage') - string.gsub!(/\\copy(right|mark)?/,'<=copymark>') # ok problem with superscript - string - end - def xetex_special_characters_2(string) - string.gsub!(/œ/,'\oe ') - string.gsub!(/\$/,'\$') - string.gsub!(/\#/,'\#') - string.gsub!(/\%/,'\%') - string.gsub!(/\~/,'\~') #revist, should not be necessary to mark remaining tildes - if string !~/^\s*<:image|\}:image\s/ - string.gsub!(/_/,'\_') - end - string.gsub!(/\{/,'\{') - string.gsub!(/\}/,'\}') - string.gsub!(/ /,'~') # ~ character for hardspace - # sequence important must appear after removal of { and } - string.gsub!(/&\S+?;/,'') #hmmm - # sequence imortant place before removal of & - if string=~/<:code>/; @@flag_code=true - elsif string=~/<:code-end>/; @@flag_code=false - end - if @@flag_code; string.gsub!(/&/,'{\\\&}') - else string.gsub!(/(\s+&\s+)/,' and ') - end - string.gsub!(/§/u,'\S') #latex: space between next character not preserved? #string.gsub!(/§ /,'\S ') - string.gsub!(/£/u,'\pounds') - string.gsub!(/&\S+?;/,' ') - string.gsub!(//,' ') - string.gsub!(/<\/a>/,' ') - string.gsub!(/[^\}>_]((?:https?|file|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case - string.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url - string.gsub!(/\B(?:\\_|\\)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration - unless @@flag_code - string.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration positive lookahead, sequence issue with { linked }http://url cannot use \b at start - else #code-block: angle brackets special characters, note _ already escaped - string.gsub!(/\\_/,'{\UseTextSymbol{OML}{>}}') - end - string.gsub!(/<:ee>/,'') - string.gsub!(//,' ') - #proposed change, insert, but may be redundant - string.gsub!(/ \/><:i[12]>(.+?)(?:\}~||<\/\s*(br|p)>|<(br|p)\s*\/>/," #{@@tex_backslash*2} ") # Work Area - string.gsub!(/(.+?)<\/b>/,'\begin{bfseries}\1 \end{bfseries}') - string.gsub!(/(.+?)<\/em>/,'\begin{bfseries}\1 \end{bfseries}') - string.gsub!(/<(bold|strong)>(.+?)<\/(bold|strong)>/,'\begin{bfseries}\1 \end{bfseries}') - string.gsub!(/(.+?)<\/h\d+>/,'\begin{bfseries}\1 \end{bfseries}') - string.gsub!(/(.+?)<\/i>/,'\emph{\1}') - string.gsub!(/(.+?)<\/italic>/,'\emph{\1}') - string.gsub!(/(.+?)<\/u>/,'\uline{\1}') # ulem - string.gsub!(/(.+?)<\/cite>/,"``\\1''") # quote - string.gsub!(/(.+?)<\/ins>/,'\uline{\1}') # ulem - string.gsub!(/(.+?)<\/del>/,'\sout{\1}') # ulem - string.gsub!(/(.+?)<\/sub>/,"\$_{\\textrm{\\1}}\$") - string.gsub!(/(.+?)<\/sup>/,"\$^{\\textrm{\\1}}\$") - unless @@flag_code - string.gsub!(/"(.+?)"/,'“\1”') # quote marks / quotations open & close " need condition exclude for code - string.gsub!(/\s+"/,' “') # open " - string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*"/,'\1“') # open " - string.gsub!(/"(\s|\.|,|:|;)/,'”\1') # close " - string.gsub!(/"([1-6-]#{@@tilde}\S*|<.+?>)?\s*$/,'”\1') # close " - string.gsub!(/"(\.|,)/,'”') # close " - string.gsub!(/\s+'/,' `') # open ' - string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*'/,'\1`') # open ' - end - #string.gsub!(/^(<:i[1-9]>)?\s*\\_\*\s*/,'\1 \begin{math} \bullet \end{math}~~') #bullets - added 2004w17 watch \\_ - string.gsub!(/^(<:i[1-9]>)?\s*\\_\*\s*/,'\1 ● ~~') - string.gsub!(/(|<\/font>)/,'') - string.gsub!(/\s*(\S+?)<\/sup>/,'^\1') - string.gsub!(/(|<\/sup>)/,'') - string - end - def xetex_special_characters_3(string) - string.gsub!(/])/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder - string.gsub!(/([^<][^b][^r]\s+)\/>/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder - #problem sequence (another kludge) -> - string.gsub!(/<=lt>/,'{\UseTextSymbol{OML}{<}}') - string.gsub!(/<=gt>/,'{\UseTextSymbol{OML}{>}}') - #string.gsub!(/<=lt>/,'\<') - #string.gsub!(/<=gt>/,'\>') - string.gsub!(/<=underscore>/,'\_') - string.gsub!(/(\href\{http:\/\/\S+?)(?:(?:<=tilde>)(\S+))+\}/,'\1\~\2}') #tildes in urls \href treated differently from text - string.gsub!(/<=tilde>/,'{\~~}') - string.gsub!(/<=pipe>/,'{\textbar}') - string.gsub!(/<=caret>/,'{\^{~}}') - #string.gsub!(/<=caret>/,'\^{}') - string.gsub!(/<=exclaim>/,'\Verbatim{!}') - string.gsub!(/<=hash>/,'{\#}') - #string.gsub!(/<=hash>/,'{\UseTextSymbol{OT1}{#}}') - #string.gsub!(/<=slash>/,'{\slash}') - string.gsub!(/<=hardspace>/,'{~}') #changed ... 2005 - string.gsub!(/<=amp>/,'{\\\&}') #changed ... 2005 - #string.gsub!(/<=amp>/,'{\UseTextSymbol{OT1}{&}}') - string.gsub!(/<=slash>/,'{/}') - string.gsub!(/<=backslash>/,'{\textbackslash}') - #string.gsub!(/<=asterisk>/,'*') - #string.gsub!(/<=exclaim>/,'!') - #string.gsub!(/<=asterisk>/,'{\ast}') - #string.gsub!(/<=copymark>/,"^{\\copyright} ") # watch has been problematic - #copymark='{\\begin{small}\\raisebox{1ex}{\\copyright}\\end{small}} ' - string.gsub!(/<=copymark>\s*(.+)?\s+(<\\~\d+;\w(?:[0-6]:)?\d+;\w\d+><#@dp:#@dp>)/,"^\\copyright \\textnormal{\\1} \\2") # watch likely to be problematic - string - end - def special_characters_curly(string) - string.gsub!(/<=curlyopen>/,'\{') - string.gsub!(/<=curlyclose>/,'\}') - string - end - - - def special_characters_unsafe_1(string) #depreciated, make obsolete - # some substitutions are sequence sensitive, rearrange with care. - string.gsub!(/\\backslash (copyright|clearpage|newpage)/,"\\\\\\1") #kludge bad solution, find out where tail is sent through specChar ! - string - end - def special_characters #special characters - some substitutions are sequence sensitive, rearrange with care. - string=@string - case @tex2pdf - when /pdf/ - string=pdftex_special_characters_1(string) unless string.nil? - string=special_characters_unsafe_1(string) unless string.nil? #pdftex_special_characters_unsafe_1(@string) - string=pdftex_special_characters_2(string) unless string.nil? - string=pdftex_special_characters_3(string) unless string.nil? - when /xe/ - string=xetex_special_characters_1(string) unless string.nil? - string=special_characters_unsafe_1(string) unless string.nil? #xetex_special_characters_unsafe_1(@string) - string=xetex_special_characters_2(string) unless string.nil? #issues with xetex - string=xetex_special_characters_3(string) unless string.nil? - end - @string=string - end - def special_characters_safe #special characters - some substitutions are sequence sensitive, rearrange with care. - string=@string - case @tex2pdf - when /pdf/ - string=pdftex_special_characters_1(@string) unless string.nil? - string=pdftex_special_characters_2(@string) unless string.nil? - #special_characters_3(@string) - when /xe/ - string=xetex_special_characters_1(@string) unless string.nil? - string=xetex_special_characters_2(@string) unless string.nil? # remove this to start with, causes issues - end - @string=string - end - def heading_major(para,lev) - title=@md.title - para.strip! if para - para.gsub!(/(?:\\begin\{bfseries\}|\\begin\{itshape\})(.+?)(?:\\end\{bfseries\}|\\end\{itshape\})/m,'\1') - cont_ln=para.dup - cont_ln.gsub!(/#{@@tex_pattern_margin_number}/,'') - cont_ln.gsub!(/#{lev}#{@@tilde}(?:\S+)?\s+/,'') - if para =~/\\[Ff]ootnote/ #and para =~/^[1-6]#{@@tilde}/ # removing footnotes from headings! - cont_ln.gsub!(/\s*\\footnote\[\d+\]\{%\n .+? \}\s*/,' ') - cont_ln.gsub!(/\s*\\Footnote[A]\{[*+]+\d*\}\{%\n .+? \}\s*/,' ') - end - para.gsub!(/(#{@md.lev}.*)\n?$/m,"\\part*{\\1} -\\addcontentsline{toc}{section}{#{cont_ln}} -\\markboth{#{title}}\n") if (para !~/#{lev}#{@@tilde}/) - para.gsub!(/^#{lev}#{@@tilde}\s*(.*)\n?$/m, - "\\part*{\\1} -\\addcontentsline{toc}{section}{#{cont_ln}} -\\markboth{#{title}}\n") + para.gsub!(/(#{@md.lev}.*)\n?$/m, #apparently not used @md.lev does not exist + "\\part*{\\1} +\\addcontentsline{toc}{section}{#{cont_ln}} +\\markboth{#{title}}\n") if (para !~/#{Mx[:lv_o]}#{lev}:/) + para.gsub!(/^#{Mx[:lv_o]}#{lev}:\S*?#{Mx[:lv_c]}\s*(.*)\n?$/m, + "\\part*{\\1} +\\addcontentsline{toc}{section}{#{cont_ln}} +\\markboth{#{title}}\n") +para end def level1 heading_major(@string,1) @@ -807,14 +276,14 @@ WOK @string.strip! if @string @string.gsub!(/(?:\\begin\{bfseries\}|\\begin\{itshape\})(.+?)(?:\\end\{bfseries\}|\\end\{itshape\})/m,'\1') cont_ln=@string.dup - cont_ln.gsub!(/#{@@tex_pattern_margin_number}/,'') + cont_ln.gsub!(@@tex_pattern_margin_number,'') cont_ln.gsub!(/#{@@tex_backslash*2}/,"#{@@tex_backslash*4}") # added w42 - cont_ln.gsub!(/4#{@@tilde}\S+\s+/,'') + cont_ln.gsub!(/#{Mx[:lv_o]}4:\S+?#{Mx[:lv_c]}\s*/,'') cont_ln.gsub!(/\\footnote\[\d+\]\{%.+?\\end\{scriptsize\}\s*\}/m,'') #arbitrary bugfix, revisit should not be necessary, eg. wta.1994 2004w22 cont_ln.gsub!(/\\Footnote[A]\{[*+]+\d*\}\{%.+?\\end\{scriptsize\}\s*\}/m,'') #arbitrary bugfix, revisit should not be necessary, eg. wta.1994 2004w22 title=@md.title @string.gsub!(/#{@md.lv4}\s+(#{@md.lv4})/m,'\1') - if @string =~/4#{@@tilde}endnotes|<:4-endnotes>/ + if @string =~/#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|<:4-endnotes>/ # watch exclusion removes endnotes marker from pdf 2003w03 @string.gsub!(/.+/m,'') end @@ -822,12 +291,12 @@ WOK cont_ln.gsub!(/\s*\\footnote\[\d+\]\{%\n .+? \}\s*/,' ') cont_ln.gsub!(/\s*\\Footnote[A]\{[*+]+\d*\}\{%\n .+? \}\s*/,' ') end - if @string !~/4#{@@tilde}/ + if @string !~/#{Mx[:lv_o]}4:/ @string.gsub!(/(#{@md.lv4}.*)\n?$/m,"\\subsubsection*{\\1} \\addcontentsline{toc}{subsection}{#{cont_ln}} \\markright{#{title}}") else - @string.gsub!(/^\s*4#{@@tilde}\S+\s*(.*)?\n?$/m,"\\subsubsection*{\\1} + @string.gsub!(/^\s*#{Mx[:lv_o]}4:\S+?#{Mx[:lv_c]}\s*(.*)?\n?$/m,"\\subsubsection*{\\1} \\addcontentsline{toc}{subsection}{#{cont_ln}} \\markright{#{title}}") end @@ -838,8 +307,8 @@ WOK @string.strip! if @string @string.gsub!(/(?:\\begin\{bfseries\}|\\begin\{itshape\})(.+?)(?:\\end\{bfseries\}|\\end\{itshape\})/m,'\1') cont_ln=@string.dup - cont_ln.gsub!(/#{@@tex_pattern_margin_number}/,'') - cont_ln.gsub!(/5#{@@tilde}\S*\s+/,'') + cont_ln.gsub!(@@tex_pattern_margin_number,'') + cont_ln.gsub!(/#{Mx[:lv_o]}5:\S*?#{Mx[:lv_c]}\s*/,'') cont_ln.gsub!(/\\footnote\[\d+\]\{%.+?\\end\{scriptsize\}\s*\}/m,'') #arbitrary bugfix, revisit should not be necessary, eg. wta.1994 2004w22 cont_ln.gsub!(/\\Footnote[A]\{[*+]+\d*\}\{%.+?\\end\{scriptsize\}\s*\}/m,'') #arbitrary bugfix, revisit should not be necessary, eg. wta.1994 2004w22 cont_ln.gsub!(/\\\&/,' and ') #revisit: tmp bugfix 200507, substitutes & with 'and' in toc, needed e.g. for AT&T, see ffa @@ -848,12 +317,13 @@ WOK cont_ln.gsub!(/\s*\\footnote\[\d+\]\{%\n .+? \}\s*/,' ') cont_ln.gsub!(/\s*\\Footnote[A]\{[*+]+\d*\}\{%\n .+? \}\s*/,' ') end - if @string !~/5#{@@tilde}/ + if @string !~/#{Mx[:lv_o]}5:/ @string.gsub!(/(#{@md.lv5}.*?)\n?$/m,"\\subsubsection*{\\1} \\addcontentsline{toc}{subsubsection}{#{cont_ln} \\\\ }") else - @string.gsub!(/^\s*5#{@@tilde}\S*\s*(.*)?\n?$/m,"\\subsubsection*{\\1} + @string.gsub!(/^\s*#{Mx[:lv_o]}5:\S*?#{Mx[:lv_c]}\s*(.*)?\n?$/m, + "\\subsubsection*{\\1} \\addcontentsline{toc}{subsubsection}{#{cont_ln} \\\\ }") end @@ -864,8 +334,8 @@ WOK @string.strip! if @string @string.gsub!(/(?:\\begin\{bfseries\}|\\begin\{itshape\})(.+?)(?:\\end\{bfseries\}|\\end\{itshape\})/m,'\1') cont_ln=@string.dup - cont_ln.gsub!(/#{@@tex_pattern_margin_number}/,'') - cont_ln.gsub!(/6#{@@tilde}\S*\s+/,'') + cont_ln.gsub!(@@tex_pattern_margin_number,'') + cont_ln.gsub!(/#{Mx[:lv_o]}6:\S*?#{Mx[:lv_c]}\s*/,'') cont_ln.gsub!(/\\footnote\[\d+\]\{%.+?\\end\{scriptsize\}\s*\}/m,'') #arbitrary bugfix, revisit should not be necessary, eg. wta.1994 2004w22 cont_ln.gsub!(/\\Footnote[A]\{[*+]+\d*\}\{%.+?\\end\{scriptsize\}\s*\}/m,'') #arbitrary bugfix, revisit should not be necessary, eg. wta.1994 2004w22 @string.gsub!(/#{@md.lv6}\s+(#{@md.lv6})/m,'\1') @@ -874,8 +344,8 @@ WOK cont_ln.gsub!(/\s*\\Footnote[A]\{[*+]+\d*\}\{%\n .+? \}\s*/,' ') end @string.gsub!(/(#{@md.lv6}.*)\n?$/m, - "\\subsubsection*{\\1}") if (@string !~/6#{@@tilde}/) - @string.gsub!(/^\s*6#{@@tilde}\S*\s*(.*)?\n?$/m, + "\\subsubsection*{\\1}") if (@string !~/#{Mx[:lv_o]}6:/) + @string.gsub!(/^\s*#{Mx[:lv_o]}6:\S*?#{Mx[:lv_c]}\s*(.*)?\n?$/m, '\subsubsection*{\1}') @string.gsub!(/#{@md.lv6}\s*(.marginpar)/m,'\1') #end BUGWATCH @@ -892,7 +362,7 @@ WOK when /8/; '70mm' when /9/; '80mm' end - @string.gsub!(/<:i#{lev}>\s*(.*)/m, + @string.gsub!(/#{Mx[:pa_o]}:i#{lev}#{Mx[:pa_c]}\s*(.*)/m, "\\begin{ParagraphIndent}{#{indent}}\\1 \\end{ParagraphIndent}}") end @@ -951,7 +421,6 @@ WOK end hsp="\n{\\color{mywhite} .}&~\n" # ~ character for hardspace caption="{\\\\\\\ \n\\begin{scriptsize}#{hsp*3}#{c}\\end{scriptsize}&}" if c - #caption="{\\\\\\\ \n\\begin{scriptsize}#{hsp*3}#{c[1]}\\end{scriptsize}&}" if c image_source=if @md.fns =~/\.(?:ssm\.)?sst$/ \ and FileTest.file?("#{dir.path.image_source_local_tex}/#{image}") dir.path.image_source_local_tex @@ -999,16 +468,16 @@ WOK z[/(\d+)x\d*/,1] else 200 end - dm=case @md.papersize - when /a4/; @tx.a4 - when /letter/; @tx.letter - when /legal/; @tx.legal - when /b5/; @tx.b5 - when /a5/; @tx.a5 - else @tx.a4 + dm=case @ps # @md.papersize + when /a4/; @tx.a4.landscape.img_px + when /letter/; @tx.letter.landscape.img_px + when /legal/; @tx.legal.landscape.img_px + when /b5/; @tx.b5.landscape.img_px + when /a5/; @tx.a5.landscape.img_px + else @tx.a4.landscape.img_px end - width=if width.to_i > dm.landscape.img_px - dm.landscape.img_px + width=if width.to_i > dm + dm else width end c=z[/``(.+?)''/m,1] @@ -1058,7 +527,54 @@ WOK end def http wm=@string.dup.scan(/\\\{.+?\\\}(?:(?:https?|file|ftp):\S+|image)|\w+\s*|./m) - @string=SiSU_TeX_Pdf::Format_text_object.new(@md,wm).http_word_mode #(orientation) + @string=SiSU_TeX_Pdf::Format_text_object.new(@md,wm,@ps).http_word_mode #GET PAPER SIZE AND USE IT + end + def title + @string=SiSU_TeX_Pdf::Special_characters.new(@md,@string).special_characters_safe + if @string1 + @string1=SiSU_TeX_Pdf::Special_characters.new(@md,@string1).special_characters_safe + @string1.gsub!(/\$/,"\\$") + "\n\\title{#@string#{@@tex_backslash*2} \\textbf{\\normalsize #@string1}\\normalsize}" + else "\n\\title{#@string}" + end + end + def title_landscape + title + end + def title_portrait + title + end + def para_num + paranumber_display=if @md.markup.inspect =~/no_ocn/ \ + or @md.mod.inspect =~/--no-ocn/ + '' + else "\\begin{tiny}~\\end{tiny}{\\marginpar{\\begin{tiny}#@string1\\end{tiny}}}" #ocn object citation numbering + end + if @string !~/^(?:#{Mx[:lv_o]}[1-6a-z-]:|#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}|<:.+?>|#{@md.lv1}|#{@md.lv2}|#{@md.lv3}|#{@md.lv4}|#{@md.lv5}|#{@md.lv6})/ + @string.gsub!(/^\s*(.+)/m,"#{paranumber_display}\\1\n") #watch - in 1-6 is suspect + else + if (@string =~/^(?:#{Mx[:lv_o]}[1-6a-z-]:|#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]})/) #watch - in 1-6 is suspect + @string.gsub!(/^(#{Mx[:lv_o]}[1-6a-z-]:\S*?#{Mx[:lv_c]})\s*(.+)/m,"\\1 #{paranumber_display}\\2\n") #watch - in 1-6 is suspect + #@string.gsub!(/^(#{Mx[:lv_o]}[1-6a-z-]:\S*?#{Mx[:lv_c]})\s*(.+)/m,"\\1 #{paranumber_display} \\begin{bfseries}\\2 \\end{bfseries}\n") #watch - in 1-6 is suspect + #@string.gsub!(/^(#{Mx[:lv_o]}[1-6a-z-]:\S*?#{Mx[:lv_c]})\s*(.+)/m,"\\1 #{paranumber_display} \\emph{\\2}\n") #watch - in 1-6 is suspect + @string.gsub!(/^(#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]})\s*(.+)/m,"\\1 #{paranumber_display}\\2\n") #WHAT? + #@string.gsub!(/^(<:.+?>)\s*(.+)/m,"\\1 #{paranumber_display}\\2\n") #WHAT? + else + @string.gsub!(/((#{@md.lv1}|#{@md.lv2}|#{@md.lv3}|#{@md.lv4}|#{@md.lv5}|#{@md.lv6}).+)$/,"\\2 #{paranumber_display} \\1\n") + #@string.gsub!(/((#{@md.lv1}|#{@md.lv2}|#{@md.lv3}|#{@md.lv4}|#{@md.lv5}|#{@md.lv6}).+)$/,"\\2 #{paranumber_display} \\begin{bfseries}\\1 \\end{bfseries}\n") + #@string.gsub!(/((#{@md.lv1}|#{@md.lv2}|#{@md.lv3}|#{@md.lv4}|#{@md.lv5}|#{@md.lv6}).+)$/,"\\2 #{paranumber_display}\\emph{\\1}\n") + end + end + @string + end + end + class Format_head + def initialize(md,ps,layout=nil,string1=nil) + @md,@ps,@layout,@string1=md,ps,layout,string1 + @tx=SiSU_Env::Get_init.instance.tex + @url_brace=SiSU_Viz::Skin.new.url_decoration + @tex2pdf=@@tex3pdf ||=SiSU_Env::System_call.new.tex2pdf_engine + @ps=@string if @string=~/(?:a4|letter|legal|book|a5|b5)/i end def language @lang=if @md.dc_language[:code] @@ -1095,21 +611,6 @@ WOK else 'english' end end - def title - @string=Format_text_object.new(@md,@string).special_characters_safe - if @string1 - @string1=Format_text_object.new(@md,@string1).special_characters_safe - @string1.gsub!(/\$/,"\\$") - "\n\\title{#@string#{@@tex_backslash*2} \\textbf{\\normalsize #@string1}\\normalsize}" - else "\n\\title{#@string}" - end - end - def title_landscape - title - end - def title_portrait - title - end def tex_head_lang #babel 18n language #@md.dc_language[:name] @@ -1193,14 +694,14 @@ WOK end def tex_head_paper_dimensions d={} - case @string + case @layout when /portrait/ #textheight,textwidth=@tx.a4.portrait.h,@tx.a4.portrait.w d[:papertype],d[:fontsize]='a4paper','11pt' d[:oddsidemargin],d[:evensidemargin],d[:topmargin]='0mm','0mm','-12pt' d[:headheight],d[:headsep],d[:columnsep]='12pt','35pt','' d[:marginparsep],d[:marginparwidth]='4mm','8mm' - case @md.papersize + case @ps #@md.papersize when /a4/i #European default, SiSU default d[:papertype],d[:fontsize]='a4paper','12pt' d[:textheight],d[:textwidth]=@tx.a4.portrait.h,@tx.a4.portrait.w @@ -1231,7 +732,7 @@ WOK d[:oddsidemargin],d[:evensidemargin],d[:topmargin]='-8mm','-8mm','-18mm' d[:headheight],d[:headsep],d[:columnsep]='12pt','20pt','40pt' d[:marginparsep],d[:marginparwidth]='4mm','8mm' - case @md.papersize + case @ps #@md.papersize when /a4/i #European default, SiSU default d[:papertype],d[:fontsize]='a4paper','12pt' d[:textheight],d[:textwidth]=@tx.a4.landscape.h,@tx.a4.landscape.w @@ -1245,7 +746,7 @@ WOK d[:papertype],d[:fontsize],d[:columnsep]='b5paper','11pt','35pt' d[:textheight],d[:textwidth]=@tx.b5.landscape.h,@tx.b5.landscape.w when /a5/i - d[:papertype],d[:fontsize],d[:columnsep]='a5paper','10pt','38pt' + d[:papertype],d[:fontsize],d[:columnsep]='a5paper','10pt','32pt' d[:textheight],d[:textwidth]=@tx.a5.landscape.h,@tx.a5.landscape.w else #default currently A4 d[:papertype],d[:fontsize]='a4paper','12pt' @@ -1255,8 +756,7 @@ WOK d end def tex_head_paper - d={} - case @string + case @layout when /portrait/ tex_head_paper_portrait(tex_head_paper_dimensions) when /landscape/ @@ -1264,7 +764,7 @@ WOK end end def tex_head_pdftex_dvi - color=case @string + color=case @layout when /portrait/ <') + #if w !~/^(\s*<:image|\}:image\s)|/ + # w.gsub!(/_/,'\_') + #end + end + w.gsub!(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/,'<=tilde>') #126 usual + #w.gsub!(/&#(?:126|152);/,'<=tilde>') unless w=~/https?:\/\/\S+/ #126 usual + w.gsub!(/\\?\||#{Mx[:gl_o]}#124#{Mx[:gl_c]}/,'<=pipe>') #unless w=~/<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/ # | SiSU not really special sisu character but done, also LaTeX + end + para_array << w + end + string=para_array.join(' ') + string=string.strip + string + else '' end - if @string !~/^([1-6a-z-]#{@@tilde}\S*|<:.+?>|#{@md.lv1}|#{@md.lv2}|#{@md.lv3}|#{@md.lv4}|#{@md.lv5}|#{@md.lv6})/ - @string.gsub!(/^\s*(.+)/m,"#{paranumber_display}\\1\n") #watch - in 1-6 is suspect - else - if (@string =~/^(?:[1-6a-z-]#{@@tilde}\S*|<:.+?>)/) #watch - in 1-6 is suspect - @string.gsub!(/^([1-6a-z-]#{@@tilde}\S*)\s*(.+)/m,"\\1 #{paranumber_display} \\2\n") #watch - in 1-6 is suspect - @string.gsub!(/^(<:.+?>)\s*(.+)/m,"\\1 #{paranumber_display}\\2\n") - else - @string.gsub!(/((#{@md.lv1}|#{@md.lv2}|#{@md.lv3}|#{@md.lv4}|#{@md.lv5}|#{@md.lv6}).+)$/,"\\2 #{paranumber_display}\\1\n") + string.gsub(/\s*#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}\s*/,' ') + string.gsub!(/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;[umdv]\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'') + string.gsub!(/.+?<-#>/,'') + string.gsub!(/#{Mx[:br_eof]}|#{Mx[:br_endnotes]}/,'') + #problem sequence -> + string.gsub!(/&(?:nbsp);/,'<=hardspace>') # < SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#nbsp#{Mx[:gl_c]}/,'<=hardspace>') # < SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<=lt>') # < SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'<=gt>') # > SiSU special character also LaTeX + #string.gsub!(/#{Mx[:gl_o]}(#[a-z]+|#[0-9]+)#{Mx[:gl_c]}/,'\1') #i don't think so + string.gsub!(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'<=curlyopen>') # { SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'<=curlyclose>') # } SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/,'<=tilde>') # ~ SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'\#') # # SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!') # ! SiSU not really special sisu character but done, also LaTeX + string.gsub!(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*') # * should you wish to escape astrisk e.g. describing \*{bold}* + string.gsub!(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-') # - SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#043#{Mx[:gl_c]}/,'+') # + SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#044#{Mx[:gl_c]}/,',') # + SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#038#{Mx[:gl_c]}/,'<=amp>') #unless @string=~/<:code>/ # / SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'<=slash>') # / SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#092#{Mx[:gl_c]}/,'<=backslash>') # \ SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'<=underscore>') # _ SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#124#{Mx[:gl_c]}/,'|') # | SiSU not really special sisu character but done, also LaTeX + string.gsub!(/#{Mx[:gl_o]}#058#{Mx[:gl_c]}/,':') # : SiSU not really special sisu character but done, also LaTeX + string.gsub!(/#{Mx[:gl_o]}#094#{Mx[:gl_c]}|\^/,'<=caret>') # ^ SiSU not really special sisu character but done, also LaTeX + string.gsub!(/\#/,'<=hash>') + ##watch placement, problem sequence ^ + string.gsub!(/&atild;<\/font><\/sup>/,' ') + string.gsub!(/#{Mx[:br_page]}/,'\newpage') + string.gsub!(/#{Mx[:br_page_new]}/,'\clearpage') + string.gsub!(/\\copy(right|mark)?/,'<=copymark>') # ok problem with superscript + string + end + def pdftex_special_characters_2(string) + string.gsub!(/#{Mx[:gl_o]}#156#{Mx[:gl_c]}/,'\oe ') + string.gsub!(/\$/,'\$') + string.gsub!(/\#/,'\#') + string.gsub!(/\%/,'\%') + string.gsub!(/\~/,'\~') #revist, should not be necessary to mark remaining tildes + if string !~/^\s*<:image|\}:image\s/ + string.gsub!(/_/,'\_') + end + string.gsub!(/\{/,'\{') + #string.gsub!(/\}/,'\}') + string.gsub!(/ /,'~') # ~ character for hardspace + # sequence important must appear after removal of { and } + string.gsub!(/&\S+?;/,'') #hmmm + # sequence imortant place before removal of & + if string=~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/; @@flag_code=true + elsif string=~/#{Mx[:gr_o]}code-end#{Mx[:gr_c]}/; @@flag_code=false + end + if @@flag_code; string.gsub!(/&/,'{\\\&}') + else string.gsub!(/(\s+&\s+)/,' and ') + end + string.gsub!(/§/u,'\S') #latex: space between next character not preserved? #string.gsub!(/§ /,'\S ') + string.gsub!(/£/u,'\pounds') + string.gsub!(/&\S+?;/,' ') + string.gsub!(//,' ') + string.gsub!(/<\/a>/,' ') + string.gsub!(/[^\}>_]((?:https?|file|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case + string.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url + string.gsub!(/\B(?:\\_|\\)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration + unless @@flag_code + string.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration positive lookahead, sequence issue with { linked }http://url cannot use \b at start + else #code-block: angle brackets special characters, note _ already escaped + string.gsub!(/\\_/,'{\UseTextSymbol{OML}{>}}') + end + string.gsub!(/<:ee>/,'') + string.gsub!(//,' ') + #proposed change, insert, but may be redundant + string.gsub!(/ \/>#{Mx[:pa_o]}:i[12]#{Mx[:pa_c]}(.+?)(?:\}~||<\/\s*(br|p)>|<(br|p)\s*\/>/," #{@@tex_backslash*2} ") # Work Area + string.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\begin{bfseries}\1 \end{bfseries}') + #string.gsub!(/(.+?)<\/em>/,'\begin{bfseries}\1 \end{bfseries}') + #string.gsub!(/<(bold|strong)>(.+?)<\/(bold|strong)>/,'\begin{bfseries}\1 \end{bfseries}') + string.gsub!(/(.+?)<\/h\d+>/,'\begin{bfseries}\1 \end{bfseries}') + string.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\emph{\1}') + #string.gsub!(/(.+?)<\/italic>/,'\emph{\1}') + string.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\uline{\1}') # ulem + string.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,"``\\1''") # quote #CHECK + string.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\uline{\1}') # ulem + string.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\sout{\1}') # ulem + string.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,"\$^{\\textrm{\\1}}\$") + string.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,"\$_{\\textrm{\\1}}\$") + unless @@flag_code + string.gsub!(/"(.+?)"/,'“\1”') # quote marks / quotations open & close " need condition exclude for code + string.gsub!(/\s+"/,' “') # open " + string.gsub!(/^(#{Mx[:lv_o]}[1-6-]:\S*?#{Mx[:lv_c]}|<.+?>)?\s*"/,'\1“') # open " + string.gsub!(/"(\s|\.|,|:|;)/,'”\1') # close " + string.gsub!(/"(#{Mx[:lv_o]}[1-6-]:\S*?#{Mx[:lv_c]}|<.+?>)?\s*$/,'”\1') # close " + string.gsub!(/"(\.|,)/,'”') # close " + string.gsub!(/\s+'/,' `') # open ' + string.gsub!(/^(#{Mx[:lv_o]}[1-6-]:\S*?#{Mx[:lv_c]}|<.+?>)?\s*'/,'\1`') # open ' + end + string.gsub!(/^(#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]})?\s*#{Mx[:gl_bullet]}\s*/,'\1 \begin{math} \bullet \end{math}~~') #bullets - added 2004w17 watch \\_ + string.gsub!(/(|<\/font>)/,'') + string.gsub!(/\s*#{Mx[:fa_superscript_o]}(\S+?)#{Mx[:fa_superscript_c]}/,'^\1') + #string.gsub!(/\s*(?:#{Mx[:br_line]}|#{Mx[:br_paragraph]}|\n)\*/,' \\\\ ') + #string.gsub!(/(|<\/sup>)/,'') + string + end + def pdftex_special_characters_3(string) + string.gsub!(/])/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder + string.gsub!(/([^<][^b][^r]\s+)\/>/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder + #problem sequence (another kludge) -> + string.gsub!(/<=lt>/,'{\UseTextSymbol{OML}{<}}') + string.gsub!(/<=gt>/,'{\UseTextSymbol{OML}{>}}') + #string.gsub!(/<=lt>/,'\<') + #string.gsub!(/<=gt>/,'\>') + string.gsub!(/<=underscore>/,'\_') + string.gsub!(/(\href\{http:\/\/\S+?)(?:(?:<=tilde>)(\S+))+\}/,'\1\~\2}') #tildes in urls \href treated differently from text + string.gsub!(/<=tilde>/,'{\~~}') + string.gsub!(/<=pipe>/,'{\textbar}') + string.gsub!(/<=caret>/,'{\^{~}}') + #string.gsub!(/<=caret>/,'\^{}') + string.gsub!(/<=exclaim>/,'\Verbatim{!}') + string.gsub!(/<=hash>/,'{\#}') + #string.gsub!(/<=hash>/,'{\UseTextSymbol{OT1}{#}}') + #string.gsub!(/<=slash>/,'{\slash}') + string.gsub!(/<=hardspace>/,'{~}') #changed ... 2005 + string.gsub!(/<=amp>/,'{\\\&}') #changed ... 2005 + #string.gsub!(/<=amp>/,'{\UseTextSymbol{OT1}{&}}') + string.gsub!(/<=slash>/,'{/}') + string.gsub!(/<=backslash>/,'{\textbackslash}') + #string.gsub!(/<=asterisk>/,'*') + #string.gsub!(/<=exclaim>/,'!') + #string.gsub!(/<=asterisk>/,'{\ast}') + #string.gsub!(/<=copymark>/,"^{\\copyright} ") # watch has been problematic + #copymark='{\\begin{small}\\raisebox{1ex}{\\copyright}\\end{small}} ' + string.gsub!(/<=copymark>\s*(.+)?\s+(#{Mx[:id_o]}\\~\d+;\w(?:[0-6]:)?\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})/,"^\\copyright \\textnormal{\\1} \\2") # watch likely to be problematic + string + end + def xetex_special_characters_1(string) # ~ ^ $ & % _ { } #LaTeX special characters - KEEP list + #p @@utf_8.list + #string=Iconv.conv('ISO-8859-1', 'UTF-8', @string) + word=string.scan(/\S+|\n/) #unless line =~/^(?:0~\S|%+\s)/ + para_array=[] + string=if word + word.each do |w| # _ - / # | : ! ^ ~ + unless string =~/^(?:#{Rx[:meta]}|%+ |#{Mx[:gr_o]}Th?#{Mx[:tc_p]} )/um + unless w=~/^#{Mx[:lv_o]}[1-6]:|~\{|\}~|~\[|\]~|^\^~\s|~\^|\*~\S+|~#|\{t~|#{Mx[:id_o]}~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}/ + w.gsub!(/[\\]?~/,'<=tilde>') + #if w !~/^(\s*<:image|\}:image\s)/ + # w.gsub!(/_/,'\_') + #end + end + w.gsub!(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/,'<=tilde>') #126 usual + #w.gsub!(/&#(?:126|152);/,'<=tilde>') unless w=~/https?:\/\/\S+/ #126 usual + w.gsub!(/\\?\||#{Mx[:gl_o]}#124#{Mx[:gl_c]}/,'<=pipe>') #unless w=~/<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/ # | SiSU not really special sisu character but done, also LaTeX + end + para_array << w end + string=para_array.join(' ') + string=string.strip + string + else '' end - @string + string.gsub(/\s*#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}\s*/,' ') + string.gsub!(/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;[umdv]\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'') + string.gsub!(/.+?<-#>/,'') + string.gsub!(/#{Mx[:br_eof]}/,'') + string.gsub!(/#{Mx[:br_endnotes]}/,'') + #string.gsub!(//,'') + #problem sequence -> + string.gsub!(/&(?:nbsp);/,'<=hardspace>') # < SiSU special character also LaTeX + string.gsub!(/&(?:lt|#060);/,'<=lt>') # < SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#(?:gt|062)#{Mx[:gl_c]}/,'<=gt>') # > SiSU special character also LaTeX + #string.gsub!(/#{Mx[:gl_o]}(&#(?:[a-z]+|[0-9]+);)#{Mx[:gl_c]}/,'\1') + string.gsub!(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'<=curlyopen>') # { SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'<=curlyclose>') # } SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/,'<=tilde>') # ~ SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'\#') # # SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!') # ! SiSU not really special sisu character but done, also LaTeX + string.gsub!(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*') # * should you wish to escape astrisk e.g. describing \*{bold}* + string.gsub!(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-') # - SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#043#{Mx[:gl_c]}/,'+') # + SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#044#{Mx[:gl_c]}/,',') # + SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#038#{Mx[:gl_c]}/,'<=amp>') #unless @string=~/<:code>/ # / SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'<=slash>') # / SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#092#{Mx[:gl_c]}/,'<=backslash>') # \ SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'<=underscore>') # _ SiSU special character also LaTeX + string.gsub!(/#{Mx[:gl_o]}#124#{Mx[:gl_c]}/,'|') # | SiSU not really special sisu character but done, also LaTeX + string.gsub!(/#{Mx[:gl_o]}#058#{Mx[:gl_c]}/,':') # : SiSU not really special sisu character but done, also LaTeX + string.gsub!(/#{Mx[:gl_o]}#094#{Mx[:gl_c]}|\^/,'<=caret>') # ^ SiSU not really special sisu character but done, also LaTeX + string.gsub!(/\#/,'<=hash>') + ##watch placement, problem sequence ^ + string.gsub!(/&atild;<\/font><\/sup>/,' ') + string.gsub!(/#{Mx[:br_page]}/,'\newpage') + string.gsub!(/#{Mx[:br_page_new]}/,'\clearpage') + string.gsub!(/\\copy(right|mark)?/,'<=copymark>') # ok problem with superscript + string + end + def xetex_special_characters_2(string) + string.gsub!(/#{Mx[:gl_o]}#156#{Mx[:gl_c]}/,'\oe ') + string.gsub!(/\$/,'\$') + string.gsub!(/\#/,'\#') + string.gsub!(/\%/,'\%') + string.gsub!(/\~/,'\~') #revist, should not be necessary to mark remaining tildes + if string !~/^\s*<:image|\}:image\s/ + string.gsub!(/_/,'\_') + end + string.gsub!(/\{/,'\{') + string.gsub!(/\}/,'\}') + string.gsub!(/ /,'~') # ~ character for hardspace + # sequence important must appear after removal of { and } + string.gsub!(/&\S+?;/,'') #hmmm + # sequence imortant place before removal of & + if string=~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/; @@flag_code=true + elsif string=~/#{Mx[:gr_o]}code-end#{Mx[:gr_c]}/; @@flag_code=false + end + if @@flag_code; string.gsub!(/&/,'{\\\&}') + else string.gsub!(/(\s+&\s+)/,' and ') + end + string.gsub!(/§/u,'\S') #latex: space between next character not preserved? #string.gsub!(/§ /,'\S ') + string.gsub!(/£/u,'\pounds') + string.gsub!(/&\S+?;/,' ') + string.gsub!(//,' ') + string.gsub!(/<\/a>/,' ') + string.gsub!(/[^\}>_]((?:https?|file|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case + string.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url + string.gsub!(/\B(?:\\_|\\)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration + unless @@flag_code + string.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration positive lookahead, sequence issue with { linked }http://url cannot use \b at start + else #code-block: angle brackets special characters, note _ already escaped + string.gsub!(/\\_/,'{\UseTextSymbol{OML}{>}}') + end + string.gsub!(/<:ee>/,'') + string.gsub!(//,' ') + #proposed change, insert, but may be redundant + string.gsub!(/ \/>#{Mx[:pa_o]}:i[12]#{Mx[:pa_c]}(.+?)(?:\}~||<\/\s*(br|p)>|<(br|p)\s*\/>/," #{@@tex_backslash*2} ") # Work Area + string.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\begin{bfseries}\1 \end{bfseries}') + #string.gsub!(/(.+?)<\/em>/,'\begin{bfseries}\1 \end{bfseries}') + #string.gsub!(/<(bold|strong)>(.+?)<\/(bold|strong)>/,'\begin{bfseries}\1 \end{bfseries}') + string.gsub!(/(.+?)<\/h\d+>/,'\begin{bfseries}\1 \end{bfseries}') + string.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\emph{\1}') + #string.gsub!(/(.+?)<\/italic>/,'\emph{\1}') + string.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\uline{\1}') # ulem + string.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,"``\\1''") # quote #CHECK + string.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\uline{\1}') # ulem + string.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\sout{\1}') # ulem + string.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,"\$^{\\textrm{\\1}}\$") + string.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,"\$_{\\textrm{\\1}}\$") + unless @@flag_code + string.gsub!(/"(.+?)"/,'“\1”') # quote marks / quotations open & close " need condition exclude for code + string.gsub!(/\s+"/,' “') # open " + string.gsub!(/^(#{Mx[:lv_o]}[1-6-]:\S*?#{Mx[:lv_c]}|<.+?>)?\s*"/,'\1“') # open " + string.gsub!(/"(\s|\.|,|:|;)/,'”\1') # close " + string.gsub!(/"(#{Mx[:lv_o]}[1-6-]:\S*?#{Mx[:lv_c]}|<.+?>)?\s*$/,'”\1') # close " + string.gsub!(/"(\.|,)/,'”') # close " + string.gsub!(/\s+'/,' `') # open ' + string.gsub!(/^(#{Mx[:lv_o]}[1-6-]:\S*?#{Mx[:lv_c]}|<.+?>)?\s*'/,'\1`') # open ' + end + string.gsub!(/^\s*#{Mx[:gl_bullet]}\s*/,'\begin{math} \bullet \end{math}~~') + string.gsub!(/^(#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]})?\s*#{Mx[:gl_bullet]}\s*/,'\1 \begin{math} \bullet \end{math}~~') + #string.gsub!(/^\s*#{Mx[:gl_bullet]}\s*/,'● ~~') + #string.gsub!(/^(#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]})?\s*#{Mx[:gl_bullet]}\s*/,'\1 ● ~~') + ##string.gsub!(/^(#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]})?\s*\\_\*\s*/,'\1 ● ~~') + ##string.gsub!(/^\\_\*\s*/,'● ~~') + string.gsub!(/(|<\/font>)/,'') + string.gsub!(/\s*#{Mx[:fa_superscript_o]}(\S+?)#{Mx[:fa_superscript_c]}/,'^\1') + #string.gsub!(/\s*(?:#{Mx[:br_line]}|#{Mx[:br_paragraph]}|\n)\*/,' \\\\ ') + #string.gsub!(/(|<\/sup>)/,'') + string + end + def xetex_special_characters_3(string) + string.gsub!(/])/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder + string.gsub!(/([^<][^b][^r]\s+)\/>/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder + #problem sequence (another kludge) -> + string.gsub!(/<=lt>/,'{\UseTextSymbol{OML}{<}}') + string.gsub!(/<=gt>/,'{\UseTextSymbol{OML}{>}}') + #string.gsub!(/<=lt>/,'\<') + #string.gsub!(/<=gt>/,'\>') + string.gsub!(/<=underscore>/,'\_') + string.gsub!(/(\href\{http:\/\/\S+?)(?:(?:<=tilde>)(\S+))+\}/,'\1\~\2}') #tildes in urls \href treated differently from text + string.gsub!(/<=tilde>/,'{\~~}') + string.gsub!(/<=pipe>/,'{\textbar}') + string.gsub!(/<=caret>/,'{\^{~}}') + #string.gsub!(/<=caret>/,'\^{}') + string.gsub!(/<=exclaim>/,'\Verbatim{!}') + string.gsub!(/<=hash>/,'{\#}') + #string.gsub!(/<=hash>/,'{\UseTextSymbol{OT1}{#}}') + #string.gsub!(/<=slash>/,'{\slash}') + string.gsub!(/<=hardspace>/,'{~}') #changed ... 2005 + string.gsub!(/<=amp>/,'{\\\&}') #changed ... 2005 + #string.gsub!(/<=amp>/,'{\UseTextSymbol{OT1}{&}}') + string.gsub!(/<=slash>/,'{/}') + string.gsub!(/<=backslash>/,'{\textbackslash}') + #string.gsub!(/<=asterisk>/,'*') + #string.gsub!(/<=exclaim>/,'!') + #string.gsub!(/<=asterisk>/,'{\ast}') + #string.gsub!(/<=copymark>/,"^{\\copyright} ") # watch has been problematic + #copymark='{\\begin{small}\\raisebox{1ex}{\\copyright}\\end{small}} ' + string.gsub!(/<=copymark>\s*(.+)?\s+(#{Mx[:id_o]}\\~\d+;\w(?:[0-6]:)?\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})/,"^\\copyright \\textnormal{\\1} \\2") # watch likely to be problematic + string + end + def special_characters_curly(string) + string.gsub!(/<=curlyopen>/,'\{') + string.gsub!(/<=curlyclose>/,'\}') + string + end + def special_characters_unsafe_1(string) #depreciated, make obsolete + # some substitutions are sequence sensitive, rearrange with care. + string.gsub!(/\\backslash (copyright|clearpage|newpage)/,"\\\\\\1") #kludge bad solution, find out where tail is sent through specChar ! + string + end + def special_characters #special characters - some substitutions are sequence sensitive, rearrange with care. + string=@string + case @tex2pdf + when /pdf/ + string=pdftex_special_characters_1(string) unless string.nil? + string=special_characters_unsafe_1(string) unless string.nil? #pdftex_special_characters_unsafe_1(@string) + string=pdftex_special_characters_2(string) unless string.nil? + string=pdftex_special_characters_3(string) unless string.nil? + when /xe/ + string=xetex_special_characters_1(string) unless string.nil? + string=special_characters_unsafe_1(string) unless string.nil? #xetex_special_characters_unsafe_1(@string) + string=xetex_special_characters_2(string) unless string.nil? #issues with xetex + string=xetex_special_characters_3(string) unless string.nil? + end + @string=string + end + def special_characters_safe #special characters - some substitutions are sequence sensitive, rearrange with care. + string=@string + case @tex2pdf + when /pdf/ + string=pdftex_special_characters_1(@string) unless string.nil? + string=pdftex_special_characters_2(@string) unless string.nil? + #special_characters_3(@string) + when /xe/ + string=xetex_special_characters_1(@string) unless string.nil? + string=xetex_special_characters_2(@string) unless string.nil? # remove this to start with, causes issues + end + @string=string + end + end + class Use_TeX + attr_accessor :url,:txt,:date + def initialize(md) + @md=md + @vz=SiSU_Env::Get_init.instance.skin + @date=SiSU_Env::Info_date.new # #{@date.year} + @copymark='{\\begin{footnotesize}\\raisebox{1ex}{\\copyright}\\end{footnotesize}}' + @url_brace=SiSU_Viz::Skin.new.url_decoration + end + def skip + "\n\\vspace*{\\smallskipamount} \n" + end + def paraskip_normal + '\setlength{\parskip}{1ex plus0.5ex minus0.2ex}' + end + def paraskip_small + '\setlength{\parskip}{0.5ex plus0.2ex minus0.1ex}' + end + def skip_small + #"\\smallskip{}" + end + def skip_small_vspace + "\n\\vspace*{\\smallskipamount} \n" + end + def skip_small_footnote + #"\n\\smallskip{}\n" + end + def skip_medium + "\n\\medskip{}\n\n" + end + def skip_dummy + "\n" + end + def header + "\\lhead[ ]{ }\n" + + "\\chead[ \\fancyplain{} \\bfseries \\footnotesize \\leftmark ]{ \\fancyplain{} \\bfseries \\footnotesize \\rightmark }\n" + + "\\rhead[ ]{ }\n" + end + def footer + base_prog_txt=if @md.base_program + case @md.base_program + when /kdissert/i; " \\\\ \\href{http://freehackers.org/~tnagy/kdissert/}{Kdissert}" + else '' + end + else '' + end + "\\lfoot[\\textrm{\\thepage}]{\\tiny \\href{#{@vz.url_sisu}}{#{@vz.txt_signature}}#{base_prog_txt}}\n" + + "\\cfoot[\\href{#{@vz.url_home}}{#{@vz.url_txt}}]{\\href{#{@vz.url_home}}{#{@vz.url_txt}}}\n" + + "\\rfoot[\\tiny \\href{#{@vz.url_sisu}}{#{@vz.txt_signature}}]{\\textrm{\\thepage}}\n" + end + def site + "\\href{#{@vz.url_home}}{#{@vz.url_txt}}" + end + def sitename #owners site, eg freeculture, free.for.all, gutenberg etc. + "\\href{#{@vz.url_home}}{#{@vz.txt_home}}" + end + def owner_chapter + "Contact Details for Original Promulgating Authority" + end + #BOOK standard dimensions - 229x156 + def newpage(orientation) + case orientation + when /landscape/ # using longtable latex package +<).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ - if /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) + if @para =~/^(\d~|<:.+?>).+?#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ + if /^(([1-6])~(\S+))\s+(\S.+?)#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m.match(@para) @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,$5 - elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) + elsif /^(([1-6])~)\s+(\S.+?)#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m.match(@para) @format,@lev,@text,@ocn=$1,$2,$3,$4 - elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) + elsif /<:(.+?)>\s*(\S.+?)#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m.match(@para) @format,@text,@ocn=$1,$2,$3 - elsif /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;[um]\d+><#@dp:#@dp>$/m.match(@para) + elsif /^(([1-6])~(\S+))\s+(\S.+?)#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;[um]\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m.match(@para) @@alt_id_count+=1 @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,"x#{@@alt_id_count}" - elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/m.match(@para) + elsif /^(([1-6])~)\s+(\S.+?)#{Mx[:id_o]}~(\d+);[um]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m.match(@para) @@alt_id_count+=1 @format,@lev,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}" end else - if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) + if /(.+?)#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m.match(@para) @text,@ocn=$1,$2 end - if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/ #added 2002w06 + if @para !~/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$|^$/ #added 2002w06 @text=/(.+?)/m.match(@para)[1] end if /^((\d)~(?:~\S+)?)\s+(.+)/m.match(@para) @@ -136,10 +136,10 @@ module SiSU_Wikispeak end end format=@format.dup - @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + @lev_para_ocn=if @para =~/.+#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{#Mx[:id_c]}$/ Format::Format_text_object.new(format,@text,@ocn) else - Format::Format_text_object.new(format,@text,"<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>") + Format::Format_text_object.new(format,@text,"#{Mx[:id_o]}~(\d+);[um]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}") end self end @@ -154,7 +154,7 @@ module SiSU_Wikispeak @data,@md=data,md @vz=SiSU_Env::Get_init.instance.skin @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern - @regx=/^(?:(?:<:p[bn]>\s*)?\d~(?:(\S+))?\s+)?(.+?)\s*<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ #m # 2004w18 pb pn removal added + @regx=/^(?:(?:#{Mx[:br_line]}\s*|#{Mx[:br_nl]}\s*)?#{Mx[:lv_o]}\d:(\S*?)#{Mx[:lv_c]}\s*)?(.+?)\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #m # 2004w18 pb pn removal added @tab="\t" @@dostype='unix footnotes' @br="\n" @@ -168,7 +168,7 @@ module SiSU_Wikispeak end # Used for extraction of endnotes from paragraphs def extract_endnotes(para='') - notes=para.scan(/~[{\[]([\d*+]+\s+.+?)\s*<#@dp>[}\]]~/) + notes=para.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/) @n=[] end def wiki_metadata(meta) @@ -220,9 +220,9 @@ WOK n3=lv + 2 lv=nil if lv == 0 extract_endnotes(para) - para.gsub!(/~[{\[](?:[\d*+]+)\s+(.+?)<#@dp>[}\]]~/,'\1') # endnote marker marked up - para.gsub!(/^\d~\S*\s+/,'') # endnote marker marked up - para.gsub!(/<\S+?><#@dp:#@dp>/,'') # endnote marker marked up + para.gsub!(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})(?:[\d*+]+)\s+(.+?)#{Mx[:id_o]}#@dp#{Mx[:id_c]}(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/,'\1') # endnote marker marked up + para.gsub!(/^#{Rx[:lv]}\S*\s+/,'') # endnote marker marked up + para.gsub!(/<\S+?>#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'') # endnote marker marked up if lv @@wiki[:body] << case lv when 1; '='*2 << para.strip << @br*2 @@ -243,7 +243,7 @@ WOK table_message='[table omitted, see other document formats]' fix=[] data.each do |para| - para.gsub!(//,'') # remove dummy headings (used by html) #check para.gsub!(/_\*\s+/,'* ') # bullet markup, marked down para.gsub!(/©/,'©') # bullet markup, marked down @@ -255,9 +255,9 @@ WOK para.gsub!(/(.+?)<\/u>/,'_\1_') para.gsub!(/\{(.+?)\}(https?:\/\/\S+)/,'[\2 \1]') para.gsub!(/(https?:\/\/\S+)/,'[\1]') - para.gsub!(/<:(?:group|verse|alt|code)(?:-end)?>(?:\s+<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>)?/,'') + para.gsub!(/<:(?:group|verse|alt|code)(?:-end)?>(?:\s+#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/,'') para.gsub!(/<:p[bn]>/,'') # remove page breaks - para.gsub!(/^\s*<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/,'') # remove empty lines - check + para.gsub!(/^\s*#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/,'') # remove empty lines - check para.gsub!(/(.+?)<\/a>/m,'\1') para.gsub!(/<:name#\S+?>/,'') # remove name links para.gsub!(/ /,' ') # decide on @@ -270,14 +270,14 @@ WOK if d_meta; wiki_metadata(d_meta) end end - if para !~/(^0~||)/ + if para !~/(^0~|#{Mx[:br_endnotes]}|#{Mx[:br_eof]})/ if para =~@regx #/.+?<~\d+;\w\d+;\w\d+>.*/ #watch change paranum=para[@regx,3] @p_num=Format::Paragraph_number.new(paranum) end @sto=Split_text_object.new(para).lev_segname_para_ocn ### problem in scroll, it appears tables are getting paragraph numbers - m=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + m=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ if para =~m \ and para=~/\S+/ para=case @sto.format @@ -325,17 +325,17 @@ WOK elsif para =~/#{table_message}/ @@wiki[:body] << para << @br elsif para =~/(Note|Endnotes?)/ \ - and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ elsif para =~/(MetaData)/ \ - and para =~/<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/ #debug 2003w46 add rc info ####suspect visit + and para =~/#{Mx[:id_o]}~(\d+);[um]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #debug 2003w46 add rc info ####suspect visit #formatMono=MonoSiSU.new('
MetaData') #para=formatMono.bold_para elsif para.include? 'Owner Details' \ - and para !~/<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + and para !~/#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #formatMono=MonoSiSU.new('
Owner Details') #@@wiki[:owner_details]=formatMono.bold_para #para='' - elsif para =~/(¡|(.*)/ one,two=$1,$2 format_text=Format_text_object.new(one,two) diff --git a/lib/sisu/v0/xhtml.rb b/lib/sisu/v0/xhtml.rb index 8503bddb..067ed8a8 100644 --- a/lib/sisu/v0/xhtml.rb +++ b/lib/sisu/v0/xhtml.rb @@ -111,16 +111,16 @@ module SiSU_XHTML end end class Scroll - @@xml={ :body=>[],:sisu=>[],:open=>[],:close=>[],:head=>[] } require "#{SiSU_lib}/shared_txt" require "#{SiSU_lib}/css" include SiSU_text_utils @@dp=nil + @@xml={ :body=>[],:sisu=>[],:open=>[],:close=>[],:head=>[] } def initialize(particulars) @env,@md,@dal_array=particulars.env,particulars.md,particulars.dal_array @vz=SiSU_Env::Get_init.instance.skin @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern - @regx=/^(?:(?:<:p[nb]>\s*)?\d~(?:(\S+))?\s+)?(.+?)\s*<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + @regx=/^(?:(?:#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}\s*)?#{Mx[:lv_o]}\d:(\S*)#{Mx[:lv_c]}\s*)?(.+?)\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ @tab="\t" @trans=SiSU_XML_munge::Trans.new(@md) @sys=SiSU_Env::System_call.new @@ -132,16 +132,16 @@ module SiSU_XHTML publish end protected - def embedded_endntoes(para='') - para.gsub!(/~\{(\d+)\s+(.+?)\s*<#@dp>\}~/, + def embedded_endnotes(para='') + para.gsub!(/#{Mx[:en_a_o]}(\d+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/, '\1\2 ') - para.gsub!(/~\[([*+]\d+)\s+(.+?)\s*<#@dp>\]~/, + para.gsub!(/#{Mx[:en_b_o]}([*+]\d+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_b_c]}/, '\1\2 ') - para.gsub!(/~\{([*+]+)\s+(.+?)\s*<#@dp>\}~/, + para.gsub!(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/, '\1\2 ') end def extract_endnotes(para='') - notes=para.scan(/~[{\[]([\d*+]+\s+.+?)\s*<#@dp>[}\]]~/) + notes=para.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/) notes.flatten.each do |e| s=e.to_s util=SiSU_text_utils::Wrap.new(s,70) @@ -176,7 +176,7 @@ WOK end def xml_head(meta) txt=meta.text - txt.gsub!(//,'') + txt.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,' ') txt.gsub!(/ & /,' and ') el=meta.el.gsub(/\./,'_') el_txt=meta.el.gsub(/\./,' ') @@ -197,7 +197,8 @@ WOK n3=lv + 2 lv=nil if lv == 0 extract_endnotes(para) - para.gsub!(/~[{\[]([\d*+]+)\s+(?:.+?)\s*<#@dp>[}\]]~/,'\1') #footnote/endnote clean + para.gsub!(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/,'\1') #footnote/endnote clean + para.gsub!(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_b_c]}/,'\1') #footnote/endnote clean if para[@regx] paragraph="#{para[@regx,2]}" util=SiSU_text_utils::Wrap.new(paragraph,70) @@ -218,9 +219,11 @@ WOK @endnotes=[] end def group_structure(para='',ocn='') - para.gsub!(/<:group(?:-end)?>/,'') + para.gsub!(/#{Mx[:gr_o]}group(?:-end)?#{Mx[:gr_c]}/,'') para=@trans.markup_group(para) para.strip! + para.gsub!(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/,'\1') #footnote/endnote clean + para.gsub!(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_b_c]}/,'\1') #footnote/endnote clean @@xml[:body] << %{#{@tab*0}} << "\n" @@xml[:body] << %{#{@tab*1}#{ocn}} << "\n" @@xml[:body] << %{#{@tab*1}#{@tab*1}\n} @@ -229,7 +232,7 @@ WOK @@xml[:body] << "#{@tab*0}" << "\n" end def poem_structure(para='',ocn='') - para.gsub!(/<:verse(?:-end)?>/,'') + para.gsub!(/#{Mx[:gr_o]}verse(?:-end)?#{Mx[:gr_c]}/,'') para=@trans.markup_group(para) para.strip! @@xml[:body] << %{#{@tab*0}} << "\n" @@ -240,7 +243,7 @@ WOK @@xml[:body] << "#{@tab*0}" << "\n" end def code_structure(para='',ocn='') - para.gsub!(/<:code(?:-end)?>/,'') + para.gsub!(/#{Mx[:gr_o]}code(?:-end)?#{Mx[:gr_c]}/,'') para=@trans.markup_group(para) para.gsub!(/\s\s/,'  ') para.strip! @@ -266,67 +269,67 @@ WOK data.each do |para| para=@trans.markup(para) @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 - if para =~/^0~(\S+)\s+(.+?)$/ # for headers + if para =~/^#{Rx[:meta]}\s*.+?$/ # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta if d_meta; xml_head(d_meta) end end if @rcdc==false \ and (para =~/~metadata/ \ - or para =~/1~meta\s+Document Information/) + or para =~/#{Mx[:lv_o]}1:meta#{Mx[:lv_c]}\s*Document Information/) @rcdc=true end - if para !~/(^0~||)/ - if para =~/.+?<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + if para !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ + if para =~/.+?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ paranum=para[@regx,3] @p_num=SiSU_XML_format::Paragraph_number.new(@md,paranum) end - @sto=SiSU_text_parts::Split_text_object.new(@md,para).lev_segname_para_ocn + @sto=SiSU_text_parts::Split_text_object.new(@md,para).xml ### problem in scroll, it appears tables are getting paragraph numbers unless @rcdc - m=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + m=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ if para =~m - format_scroll=SiSU_XML_format::Format_scroll.new(@md,@sto.text) if @sto.format =~/i[12]|null/ + format_scroll=SiSU_XML_format::Format_scroll.new(@md,@sto.text) if @sto.format =~/i[1-9]|null/ case @sto.format - when /^(1)~(?:(\S+))?/ + when /^(1):(\S*)/ xml_structure(para,$1,@sto.ocn,$2) para=@sto.lev_para_ocn.heading_body1 - when /^(2)~(?:(\S+))?/ + when /^(2):(\S*)/ xml_structure(para,$1,@sto.ocn,$2) para=@sto.lev_para_ocn.heading_body2 - when /^(3)~(?:(\S+))?/ + when /^(3):(\S*)/ xml_structure(para,$1,@sto.ocn,$2) para=@sto.lev_para_ocn.heading_body3 - when /^(4)~(\S+)/ # work on see SiSU_text_parts::Split_text_object + when /^(4):(\S+)/ # work on see SiSU_text_parts::Split_text_object xml_structure(para,$1,@sto.ocn,$2) para=@sto.lev_para_ocn.heading_body4 - when /^(5)~(?:(\S+))?/ + when /^(5):(\S*)/ xml_structure(para,$1,@sto.ocn,$2) para=@sto.lev_para_ocn.heading_body5 - when /^(6)~(?:(\S+))?/ + when /^(6):(\S*)/ xml_structure(para,$1,@sto.ocn,$2) para=@sto.lev_para_ocn.heading_body6 else - matched=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/mi.match(para) + matched=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/mi.match(para) stamp,ocn=matched[0],matched[1] - if para =~ /<:verse>/ + if para =~ /#{Mx[:gr_o]}verse#{Mx[:gr_c]}/ para.gsub!(/#{stamp}/,'') poem_structure(para,ocn) - elsif para =~ /<:group>/ + elsif para =~ /#{Mx[:gr_o]}group#{Mx[:gr_c]}/ para.gsub!(/#{stamp}/,'') group_structure(para,ocn) - elsif para =~ /<:code>/ + elsif para =~ /#{Mx[:gr_o]}code#{Mx[:gr_c]}/ para.gsub!(/#{stamp}/,'') code_structure(para,ocn) - elsif para =~/ )?_\*/ + elsif para =~ /^\s*(?:#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]})?#{Mx[:gl_bullet]}/ m=$1 - para.gsub!(/^(\s*(?:<:i[1-9]> )?)_\*/,'\1') + para.gsub!(/^(\s*(?:#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]})?)#{Mx[:gl_bullet]}/,'\1') xml_structure(para,nil,nil,nil,"indent_bullet#{m}") - elsif para =~ /<:i([1-9])>/ + elsif para =~ /^\s*#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}/ xml_structure(para,nil,nil,nil,"indent#{$1}") else xml_structure(para,nil,nil,nil) end @@ -335,14 +338,14 @@ WOK #@@xml[:body] << "#{@tab*7}#{para[@regx,2]}\n" if para[@regx,2] # main text, contents, body KEEP #@@xml[:body] << "#{@tab*6}" << "\n" if para[@regx] end - elsif para =~/(Note|Endnotes?)/ \ - and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + elsif para =~/(#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ \ + and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ elsif para =~/(MetaData)/ \ - and para =~/<~(\d+);[m]\d+;\w\d+><#@dp:#@dp>$/ #debug 2003w46 add rc info + and para =~/#{Mx[:id_o]}~\d+;[m]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #debug 2003w46 add rc info format_scroll=Format_scroll.new(@md,'
MetaData') para=format_scroll.bold_para elsif para =~/(Owner Details)/ \ - and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ format_scroll=Format_scroll.new(@md,'
Owner Details') @@xml[:owner_details]=format_scroll.bold_para para='' @@ -351,12 +354,12 @@ WOK format_text=Format_text_object.new(one,two) para=format_text.seg_no_paranum end - if (para =~// \ - and para =~/^(-\{{2}~\d+|)/) # -endnote + if para =~// \ + and para =~/^(-\{{2}~\d+|)/ # -endnote para='' end para=case para - when /<:i[1-9]>/ + when /#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/ if para =~/.*<:#>.*$/ format_text=Format_text_object.new(para,'') format_text.scr_inden_ocn_e_no_paranum @@ -373,7 +376,7 @@ WOK end else # end - para.gsub!(/<:\S+?>|/,'') if para + para.gsub!(/#{Mx[:pa_o]}:\S+#{Mx[:pa_c]}/,'') if para end end 6.downto(4) do |x| @@ -428,7 +431,7 @@ WOK SiSU_Env::SiSU_file.new(@md).mkdir filename_xml=SiSU_Env::SiSU_file.new(@md,@md.fn[:xhtml]).mkfile @data.each do |para| - para.gsub!(/<:\S+?>|/,'') + para.gsub!(/#{Mx[:pa_o]}:\S+#{Mx[:pa_c]}/,'') #; para.gsub!(/<:\S+?>|/,'') para.gsub!(/^\s*\n$/,'') filename_xml.puts para unless para.empty? end diff --git a/lib/sisu/v0/xml.rb b/lib/sisu/v0/xml.rb index 23125e05..9f897266 100644 --- a/lib/sisu/v0/xml.rb +++ b/lib/sisu/v0/xml.rb @@ -118,7 +118,7 @@ module SiSU_XML_SAX @env,@md,@dal_array=particulars.env,particulars.md,particulars.dal_array @vz=SiSU_Env::Get_init.instance.skin @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern - @regx=/^(?:(?:<:p[bn]>\s*)?\d~(?:(\S+))?\s+)?(.+?)\s*<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + @regx=/^(?:(?:#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}\s*)?#{Mx[:lv_o]}\d:(\S*?)#{Mx[:lv_c]})?\s*(.+?)\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ @tab="\t" @trans=SiSU_XML_munge::Trans.new(@md) @sys=SiSU_Env::System_call.new @@ -130,16 +130,16 @@ module SiSU_XML_SAX publish end protected - def embedded_endntoes(para='') - para.gsub!(/~\{(\d+)\s+(.+?)\s*<#@dp>\}~/, + def embedded_endnotes(para='') + para.gsub!(/#{Mx[:en_a_o]}(\d+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/, '\1\2 ') - para.gsub!(/~\[([*+]\d+)\s+(.+?)\s*<#@dp>\]~/, + para.gsub!(/#{Mx[:en_b_o]}([*+]\d+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_b_c]}/, '\1\2 ') - para.gsub!(/~\{([*+]+)\s+(.+?)\s*<#@dp>\}~/, + para.gsub!(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/, '\1\2 ') end def extract_endnotes(para='') - notes=para.scan(/~[{\[]([\d*+]+\s+.+?)\s*<#@dp>[}\]]~/) + notes=para.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/) notes.flatten.each do |e| s=e.to_s util=SiSU_text_utils::Wrap.new(s,70) @@ -176,7 +176,7 @@ WOK end def xml_head(meta) txt=meta.text - txt.gsub!(//,'') + txt.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,' ') txt.gsub!(/ & /,' and ') el=meta.el.gsub(/\./,'_') el_txt=meta.el.gsub(/\./,' ') @@ -219,7 +219,8 @@ WOK n3=lv + 2 lv=nil if lv == 0 extract_endnotes(para) - para.gsub!(/~[{\[]([\d*+]+)\s+(?:.+?)\s*<#@dp>[}\]]~/,'\1') #footnote/endnote clean + para.gsub!(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/,'\1') #footnote/endnote clean + para.gsub!(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_b_c]}/,'\1') #footnote/endnote clean if para[@regx] paragraph="#{para[@regx,2]}" util=SiSU_text_utils::Wrap.new(paragraph,70) @@ -238,9 +239,10 @@ WOK @endnotes=[] end def group_structure(para='',ocn='') - para.gsub!(/<:group(?:-end)?>/,'') + para.gsub!(/#{Mx[:gr_o]}group(?:-end)?#{Mx[:gr_c]}/,'') extract_endnotes(para) - para.gsub!(/~[{\[]([\d*+]+)\s+(?:.+?)\s*<#@dp>[}\]]~/,'\1') #footnote/endnote clean + para.gsub!(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/,'\1') #footnote/endnote clean + para.gsub!(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_b_c]}/,'\1') #footnote/endnote clean para=@trans.markup_group(para) para.strip! @@xml[:body] << %{#{@tab*0}} << "\n" @@ -253,7 +255,7 @@ WOK @endnotes=[] end def poem_structure(para='',ocn='') - para.gsub!(/<:verse(?:-end)?>/,'') + para.gsub!(/#{Mx[:gr_o]}verse(?:-end)?#{Mx[:gr_c]}/,'') para=@trans.markup_group(para) #para.gsub!(/\s\s/,'  ') para.strip! @@ -265,7 +267,7 @@ WOK @@xml[:body] << "#{@tab*0}" << "\n" end def code_structure(para='',ocn='') - para.gsub!(/<:code(?:-end)?>/,'') + para.gsub!(/#{Mx[:gr_o]}code(?:-end)?#{Mx[:gr_c]}/,'') para=@trans.markup_group(para) para.gsub!(/\s\s/,'  ') para.strip! @@ -291,82 +293,79 @@ WOK data.each do |para| para=@trans.markup(para) @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 - if para =~/^0~(\S+)\s+(.+?)$/ # for headers + if para =~/^#{Rx[:meta]}\s*.+?$/ # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta if d_meta; xml_head(d_meta) end end if @rcdc==false \ - and (para =~/~metadata/ or para =~/1~meta\s+Document Information/) + and (para =~/~metadata/ \ + or para =~/#{Mx[:lv_o]}1:meta#{Mx[:lv_c]}\s*Document Information/) @rcdc=true end - if para !~/(^0~||)/ - if para =~/.+?<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + if para !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|#{Mx[:br_endnotes]})/ + if para =~/.+?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ paranum=para[@regx,3] @p_num=SiSU_XML_format::Paragraph_number.new(@md,paranum) end - @sto=SiSU_text_parts::Split_text_object.new(@md,para).lev_segname_para_ocn - ### problem in scroll, it appears tables are getting paragraph numbers + @sto=SiSU_text_parts::Split_text_object.new(@md,para).xml + #@sto=SiSU_text_parts::Split_text_object.new(@md,para).lev_segname_para_ocn unless @rcdc - m=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + m=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ if para =~m - format_scroll=SiSU_XML_format::Format_scroll.new(@md,@sto.text) if @sto.format =~/i[12]|null/ + format_scroll=SiSU_XML_format::Format_scroll.new(@md,@sto.text) if @sto.format =~/i[1-9]|null/ case @sto.format - when /^(1)~(?:(\S+))?/ + when /^(1):(\S*)/ xml_structure(para,$1,@sto.ocn,$2) para=@sto.lev_para_ocn.heading_body1 - when /^(2)~(?:(\S+))?/ + when /^(2):(\S*)/ xml_structure(para,$1,@sto.ocn,$2) para=@sto.lev_para_ocn.heading_body2 - when /^(3)~(?:(\S+))?/ + when /^(3):(\S*)/ xml_structure(para,$1,@sto.ocn,$2) para=@sto.lev_para_ocn.heading_body3 - when /^(4)~(\S+)/ # work on see Split_text_object + when /^(4):(\S+)/ # work on see Split_text_object xml_structure(para,$1,@sto.ocn,$2) para=@sto.lev_para_ocn.heading_body4 - when /^(5)~(?:(\S+))?/ + when /^(5):(\S*)/ xml_structure(para,$1,@sto.ocn,$2) para=@sto.lev_para_ocn.heading_body5 - when /^(6)~(?:(\S+))?/ + when /^(6):(\S*)/ xml_structure(para,$1,@sto.ocn,$2) para=@sto.lev_para_ocn.heading_body6 else - matched=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/mi.match(para) + matched=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/mi.match(para) stamp,ocn=matched[0],matched[1] - if para =~ /<:verse>/ + if para =~ /#{Mx[:gr_o]}verse#{Mx[:gr_c]}/ para.gsub!(/#{stamp}/,'') poem_structure(para,ocn) - elsif para =~ /<:group>/ + elsif para =~ /#{Mx[:gr_o]}group#{Mx[:gr_c]}/ para.gsub!(/#{stamp}/,'') group_structure(para,ocn) - elsif para =~ /<:code>/ + elsif para =~ /#{Mx[:gr_o]}code#{Mx[:gr_c]}/ para.gsub!(/#{stamp}/,'') code_structure(para,ocn) - elsif para =~/ )?_\*/ #uncomment + elsif para =~ /^\s*(?:#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]})?#{Mx[:gl_bullet]}/ #uncomment m=$1 - para.gsub!(/^(\s*(?:<:i[1-9]> )?)_\*/,'\1') + para.gsub!(/^(\s*(?:#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]})?)#{Mx[:gl_bullet]}/,'\1') xml_structure(para,nil,nil,nil,"indent_bullet#{m}") - elsif para =~ /<:i([1-9])>/ + elsif para =~ /#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}/ xml_structure(para,nil,nil,nil,"indent#{$1}") else xml_structure(para,nil,nil,nil) end - #@@xml[:body] << "#{@tab*6}" << "\n" if para[@regx] - #@@xml[:body] << "#{@tab*7}#{para[@regx,3]}" << "\n" if para[@regx,3] - #@@xml[:body] << "#{@tab*7}#{para[@regx,2]}\n" if para[@regx,2] # main text, contents, body KEEP - #@@xml[:body] << "#{@tab*6}" << "\n" if para[@regx] end - elsif para =~/(Note|Endnotes?)/ \ - and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + elsif para =~/(#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ \ + and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ elsif para =~/(MetaData)/ \ - and para =~/<~(\d+);[m]\d+;\w\d+><#@dp:#@dp>$/ #debug 2003w46 add rc info + and para =~/#{Mx[:id_o]}~\d+;[m]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #debug 2003w46 add rc info format_scroll=Format_scroll.new(@md,'
MetaData') para=format_scroll.bold_para elsif para =~/(Owner Details)/ \ - and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ format_scroll=Format_scroll.new(@md,'
Owner Details') @@xml[:owner_details]=format_scroll.bold_para para='' @@ -379,8 +378,8 @@ WOK and para =~/^(-\{{2}~\d+|)/ # -endnote para='' end - if para =~/.*<:#>.*$/ - para=if para =~ /<:i[1-9]>/ + if para =~/.*<:#>.*$/ #investigate removal + para=if para =~ /#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/ format_text=Format_text_object.new(para,'') format_text.scr_inden_ocn_e_no_paranum end @@ -395,7 +394,7 @@ WOK end else # end - para.gsub!(/<:\S+?>|/,'') if para + para.gsub!(/#{Mx[:pa_o]}:\S+#{Mx[:pa_c]}/,'') if para end end 6.downto(4) do |x| @@ -451,7 +450,7 @@ WOK SiSU_Env::SiSU_file.new(@md).mkdir filename_xml=SiSU_Env::SiSU_file.new(@md,@md.fn[:sax]).mkfile @data.each do |para| - para.gsub!(/<:\S+?>|/,'') + para.gsub!(/#{Mx[:pa_o]}:\S+#{Mx[:pa_c]}/,'') #; para.gsub!(/<:\S+?>|/,'') para.gsub!(/^\s*\n$/,'') filename_xml.puts para unless para.empty? end diff --git a/lib/sisu/v0/xml_dom.rb b/lib/sisu/v0/xml_dom.rb index 008a8ed2..9bcb15ec 100644 --- a/lib/sisu/v0/xml_dom.rb +++ b/lib/sisu/v0/xml_dom.rb @@ -112,15 +112,15 @@ module SiSU_XML_DOM end end class Scroll - @@xml={ :body=>[],:open=>[],:close=>[],:head=>[],:sc=>[] } require "#{SiSU_lib}/shared_txt" include SiSU_text_utils @@dp=nil + @@xml={ :body=>[],:open=>[],:close=>[],:head=>[],:sc=>[] } def initialize(particulars) @env,@md,@dal_array=particulars.env,particulars.md,particulars.dal_array @vz=SiSU_Env::Get_init.instance.skin @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern - @regx=/^(?:(?:<:p[bn]>\s*)?\d~(?:(\S+))?\s+)?(.+?)\s*<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ # + @regx=/^(?:(?:#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}\s*)?#{Mx[:lv_o]}\d:(\S*)#{Mx[:lv_c]}\s*)?(.+?)\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ @tab="\t" @trans=SiSU_XML_munge::Trans.new(@md) @sys=SiSU_Env::System_call.new @@ -133,16 +133,16 @@ module SiSU_XML_DOM end protected def xml_markup(para='') - para.gsub!(/~\{(\d+)\s+(.+?)\s*<#@dp>\}~/, + para.gsub!(/#{Mx[:en_a_o]}(\d+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/, '\1\2 ') - para.gsub!(/~\[([*+]\d+)\s+(.+?)\s*<#@dp>\]~/, + para.gsub!(/#{Mx[:en_b_o]}([*+]\d+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_b_c]}/, '\1\2 ') - para.gsub!(/~\{([*+]+)\s+(.+?)\s*<#@dp>\}~/, + para.gsub!(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/, '\1\2 ') end def xml_head(meta) txt=meta.text - txt.gsub!(//,'') + txt.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,' ') txt.gsub!(/ & /,' and ') el=meta.el.gsub(/\./,'_') el_txt=meta.el.gsub(/\./,' ') @@ -301,9 +301,9 @@ WOK ((lv+1)..6).each { |x| @level[x]=false } end def group_structure(para='',ocn='') - para.gsub!(/<:group(?:-end)?>/,'') + para.gsub!(/#{Mx[:gr_o]}group(?:-end)?#{Mx[:gr_c]}/,'') para=@trans.markup_group(para) - para.gsub!(/~\{(\d+)\s+(.+?)\s*<#@dp>\}~/m, + para.gsub!(/#{Mx[:en_a_o]}(\d+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/m, '\1\2 ') para.strip! @@xml[:body] << %{#{@tab*6}} << "\n" @@ -314,7 +314,7 @@ WOK @@xml[:body] << "#{@tab*6}" << "\n" end def poem_structure(para='',ocn='') - para.gsub!(/<:verse(?:-end)?>/,'') + para.gsub!(/#{Mx[:gr_o]}verse(?:-end)?#{Mx[:gr_c]}/,'') para=@trans.markup_group(para) para.strip! @@xml[:body] << %{#{@tab*6}} << "\n" @@ -325,7 +325,7 @@ WOK @@xml[:body] << "#{@tab*6}" << "\n" end def code_structure(para='',ocn='') - para.gsub!(/<:code(?:-end)?>/,'') + para.gsub!(/#{Mx[:gr_o]}code(?:-end)?#{Mx[:gr_c]}/,'') para=@trans.markup_group(para) para.gsub!(/\s\s/,'  ') para.strip! @@ -344,82 +344,82 @@ WOK end def markup(data) xml_sc(@md) - @rcdc=false @level,@cont,@copen,@xml_contents_close=[],[],[],[] + @rcdc=false (0..6).each { |x| @cont[x]=@level[x]=false } (4..6).each { |x| @xml_contents_close[x]='' } data.each do |para| para=@trans.markup(para) @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 - if para =~/^0~(\S+)\s+(.+?)$/ # for headers + if para =~/^#{Rx[:meta]}\s*(.+?)$/ # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta if d_meta; xml_head(d_meta) end end if @rcdc==false \ and (para =~/~metadata/ \ - or para =~/1~meta\s+Document Information/) + or para =~/#{Mx[:lv_o]}1:meta#{Mx[:lv_x]}\s*Document Information/) @rcdc=true end - if para !~/(^0~||)/ - if para =~/.+?<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + if para !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ + if para =~/.+?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ paranum=para[@regx,3] @p_num=SiSU_XML_format::Paragraph_number.new(@md,paranum) end - @sto=SiSU_text_parts::Split_text_object.new(@md,para).lev_segname_para_ocn + @sto=SiSU_text_parts::Split_text_object.new(@md,para).xml ### problem in scroll, it appears tables are getting paragraph numbers unless @rcdc - m=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + m=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ if para =~m - format_scroll=SiSU_XML_format::Format_scroll.new(@md,@sto.text) if @sto.format =~/i[12]|null/ + format_scroll=SiSU_XML_format::Format_scroll.new(@md,@sto.text) if @sto.format =~/i[1-9]|null/ case @sto.format - when /^(1)~(?:(\S+))?/ + when /^(1):(\S*)/ xml_markup(para) xml_structure($1,@sto.ocn,para,$2) para=@sto.lev_para_ocn.heading_body1 #if para =~m - when /^(2)~(?:(\S+))?/ + when /^(2):(\S*)/ xml_markup(para) xml_structure($1,@sto.ocn,para,$2) para=@sto.lev_para_ocn.heading_body2 #if para =~m - when /^(3)~(?:(\S+))?/ + when /^(3):(\S*)/ xml_markup(para) xml_structure($1,@sto.ocn,para,$2) para=@sto.lev_para_ocn.heading_body3 #if para =~m - when /^(4)~(\S+)/ # work on see SiSU_text_parts::Split_text_object + when /^(4):(\S+)/ # work on see SiSU_text_parts::Split_text_object xml_markup(para) xml_structure($1,@sto.ocn,para,$2) para=@sto.lev_para_ocn.heading_body4 #if para =~m - when /^(5)~(?:(\S+))?/ + when /^(5):(\S*)/ xml_markup(para) xml_structure($1,@sto.ocn,para,$2) para=@sto.lev_para_ocn.heading_body5 #if para =~m - when /^(6)~(?:(\S+))?/ + when /^(6):(\S*)/ xml_markup(para) xml_structure($1,@sto.ocn,para,$2) para=@sto.lev_para_ocn.heading_body6 #if para =~m else - matched=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/mi.match(para) + matched=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/mi.match(para) stamp,ocn=matched[0],matched[1] - if para =~ /<:verse>/ + if para =~ /#{Mx[:gr_o]}verse#{Mx[:gr_c]}/ para.gsub!(/#{stamp}/,'') poem_structure(para,ocn) - elsif para =~ /<:group>/ + elsif para =~ /#{Mx[:gr_o]}group#{Mx[:gr_c]}/ para.gsub!(/#{stamp}/,'') group_structure(para,ocn) - elsif para =~ /<:code>/ + elsif para =~ /#{Mx[:gr_o]}code#{Mx[:gr_c]}/ para.gsub!(/#{stamp}/,'') code_structure(para,ocn) - elsif para =~/ )?_\*/ + when /^\s*(?:#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}\s*)?#{Mx[:gl_bullet]}/ m=$1 - para.gsub!(/^(\s*(?:<:i[1-9]> )?)_\*/,'\1') + para.gsub!(/^(\s*(?:#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}\s*)?)#{Mx[:gl_bullet]}/,'\1') "indent_bullet#{m}" - when /^\s*<:i([1-9])>/; "indent#{$1}" + when /^\s*#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}/; "indent#{$1}" else 'norm' end xml_markup(para) @@ -432,16 +432,16 @@ WOK @@xml[:body] << "#{@tab*6}" << "\n" if para[@regx] end end - elsif para =~/(Note|Endnotes?)/ \ - and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + elsif para =~/(#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ \ + and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #format_scroll=MonoSiSU.new('
Note') #para=format_scroll.boldPara elsif para =~/(MetaData)/ \ - and para =~/<~\d+;[m]\d+;\w\d+><#@dp:#@dp>$/ #debug 2003w46 add rc info + and para =~/#{Mx[:id_o]}~\d+;[m]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #debug 2003w46 add rc info format_scroll=Format_scroll.new(@md,'
MetaData') para=format_scroll.bold_para elsif para =~/(Owner Details)/ \ - and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ format_scroll=Format_scroll.new(@md,'
Owner Details') @@xml[:owner_details]=format_scroll.bold_para para='' @@ -455,8 +455,7 @@ WOK para='' end if para =~/.*<:#>.*$/ - para=case para - when /<:i[1-9]>/ + para=if para =~ /#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/ format_text=Format_text_object.new(para,'') format_text.scr_inden_ocn_e_no_paranum end @@ -471,8 +470,7 @@ WOK end else # end - para.gsub!(/<:\S+?>/,'') - para.gsub!(//,' ') ## Clean Prepared Text + para.gsub!(/#{Mx[:pa_o]}:\S+#{Mx[:pa_c]}/,'') if para end end @content_flag=true @@ -535,8 +533,7 @@ WOK filename_xml=SiSU_Env::SiSU_file.new(@md,@md.fn[:dom]).mkfile @data.each do |para| #para.strip! - para.gsub!(/<:\S+?>/,'') - para.gsub!(//,'') + para.gsub!(/#{Mx[:pa_o]}:\S+#{Mx[:pa_c]}/,'') #; para.gsub!(/<:\S+?>|/,'') para="#{para}\n" unless para.empty? filename_xml.puts para end diff --git a/lib/sisu/v0/xml_fictionbook.rb b/lib/sisu/v0/xml_fictionbook.rb index 9e9a42f9..ce100f3b 100644 --- a/lib/sisu/v0/xml_fictionbook.rb +++ b/lib/sisu/v0/xml_fictionbook.rb @@ -177,8 +177,8 @@ WOK end def markup(para,type='') para.strip! - para=para.gsub(/^[1-9]~(?:\S+)?\s+/,'') - para=para.gsub(/<~(\d+);(?:\d:\d+|\S\d+);\S\d+><#@dp:#@dp>/,'[(\1)]') + para=para.gsub(/^#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}\s*/,'') + para=para.gsub(/#{Mx[:id_o]}~(\d+);(?:\d:\d+|\S\d+);\S\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'[(\1)]') para=@trans.markup_fictionbook(para) para=if type.empty?; "

#{para}

" else "<#{type}>

#{para}

" @@ -288,7 +288,7 @@ WOK @ds=[] c=0 data.each do |para| - rgx_headers=/<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)>/ + rgx_headers=/#{Mx[:id_o]}~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)#{Mx[:id_c]}/ if para =~rgx_headers x=(rgx_headers).match(para) if x[3] =~/^[hum]\d+/ diff --git a/lib/sisu/v0/xml_format.rb b/lib/sisu/v0/xml_format.rb index 5558bd80..20eb6736 100644 --- a/lib/sisu/v0/xml_format.rb +++ b/lib/sisu/v0/xml_format.rb @@ -292,7 +292,7 @@ WOK @paranum=/(\d+)/m.match(txt[2])[1] @headname='' #if txt[2]=~/\d+/ - m=/\d~(\S+)/m.match(format) + m=/#{Mx[:lv_o]}\d:(\S+?)#{Mx[:lv_c]}/m.match(format) headname=m[1] if m @headname=%{} unless headname.nil? @p_num=SiSU_XML_format::Paragraph_number.new(@md,@paranum) @@ -301,7 +301,7 @@ WOK rgx=/^[1-6-]~{1,2}/ #watch link=txt[0].gsub(rgx,'') if @one =~rgx @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern - rgx=/~\{\d+\s+(.+?)<#@dp>\}~/ + rgx=/~\{\d+\s+(.+?)#{Mx[:id_o]}#@dp#{Mx[:id_c]}\}~/ link=txt[0].gsub(rgx,'\1') if txt[0] =~rgx @link,@linkname=link,txt[1] @vz=SiSU_Env::Get_init.instance.skin diff --git a/lib/sisu/v0/xml_scaffold.rb b/lib/sisu/v0/xml_scaffold.rb index 1404567c..9c3a7128 100644 --- a/lib/sisu/v0/xml_scaffold.rb +++ b/lib/sisu/v0/xml_scaffold.rb @@ -185,8 +185,8 @@ module SiSU_XML_scaffold @ds=[] c=0 data.each do |para| - if para =~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+>/ - x=(/<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)>/).match(para) + if para =~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}/ + x=(/#{Mx[:id_o]}~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)#{Mx[:id_c]}/).match(para) if x[3] =~/^[hu]\d+/ @ds[c]={} @ds[c][:ocn]=x[1] diff --git a/lib/sisu/v0/xml_tables.rb b/lib/sisu/v0/xml_tables.rb index 1e08f3eb..98eab515 100644 --- a/lib/sisu/v0/xml_tables.rb +++ b/lib/sisu/v0/xml_tables.rb @@ -88,43 +88,43 @@ module SiSU_Tables m=@parablock[//,1] @@tablefoot << m if m @parablock.gsub!(//,'') - @@tablehead=1 if @parablock =~//u, + @@tablehead=1 if @parablock =~/#{Mx[:gr_o]}Th#{Mx[:tc_p]}/u + if @parablock =~/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}/u + @parablock.gsub!(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}.+?#{Mx[:tc_p]}~(\d+);\w\d+;\w\d+#{Mx[:gr_c]}/u, %{\n#@ocn
} + %{#{para_table}}) - parablock.gsub!(/¡¡(\d+?)¡/u, + parablock.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}(\d+?)#{Mx[:tc_p]}/u, %{} + %{#{para_table}}) - parablock.gsub!(/!>/, '
} + %{#{para_table}}) - parablock.gsub!(/¡¡(\d+?)¡/u, + parablock.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}(\d+?)#{Mx[:tc_p]}/u, %{} + %{#{para_table}}) - parablock.gsub!(/!>/, '
}) end - if @parablock =~// + if @parablock =~/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/ tablefoot=[] #bug table footers need rethink, removed for now @@tablefoot.each {|x| tablefoot << ''} @@tablefoot=[] - @parablock.gsub!(/\s+<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>/,"
\n") # + - @parablock.gsub!(/\s*/,"
\n") + @parablock.gsub!(/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}\s+#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,"\n") # + + @parablock.gsub!(/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}\s*/,"\n") end if @@tablehead == 1 - if @parablock =~/¡¡/u - @parablock.gsub!(/ #{@vz.paragraph_table_xml}}) - @parablock.gsub!(/¡¡(\d+?)¡/u, + @parablock.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}(\d+?)#{Mx[:tc_p]}/u, %{ #{@vz.paragraph_table_xml}}) - @parablock.gsub!(/!>/,"\n\n") + @parablock.gsub!(/#{Mx[:tc_c]}/,"
\n\n") @@tablehead=0 end @parablock else - @parablock.gsub!(/ #{@vz.paragraph_table_xml}}) - @parablock.gsub!(/¡¡(\d+?)¡/u, + @parablock.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}(\d+?)#{Mx[:tc_p]}/u, %{ #{@vz.paragraph_table_xml}}) - @parablock.gsub!(/!>/,"\n\n\n") + @parablock.gsub!(/#{Mx[:tc_c]}/,"\n\n\n") end @parablock end @@ -171,39 +171,39 @@ module SiSU_Tables m=@parablock[//,1] @@tablefoot << m if m @parablock.gsub!(//,'') - @@tablehead=1 if @parablock =~//u; @parablock=table_head($1) + @@tablehead=1 if @parablock =~/#{Mx[:gr_o]}Th#{Mx[:tc_p]}/u + if @parablock =~/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}.+?#{Mx[:tc_p]}~(\d+);\w\d+;\w\d+#{Mx[:gr_c]}/u; @parablock=table_head($1) end - if @parablock =~// + if @parablock =~/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/ tablefoot=[] @@tablefoot.each {|x| tablefoot << ''} @@tablefoot=[] - if @parablock =~//; @parablock=table_end + if @parablock =~/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/; @parablock=table_end end end if @@tablehead == 1 - if @parablock =~/¡¡/u - if @parablock =~// - @parablock.gsub!(/!>/,table_row_close(true)) + if @parablock =~/#{Mx[:tc_c]}/ + @parablock.gsub!(/#{Mx[:tc_c]}/,table_row_close(true)) end @@tablehead=0 end @parablock else - if @parablock =~// - @parablock.gsub!(/!>/,table_row_close) + if @parablock =~/#{Mx[:tc_c]}/ + @parablock.gsub!(/#{Mx[:tc_c]}/,table_row_close) end @parablock end @@ -211,4 +211,5 @@ module SiSU_Tables end end end +__END__ -- cgit v1.2.3