From ff4ceb260ae3931072810a0ada124841d3b8e032 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Thu, 16 Dec 2010 23:19:35 -0500 Subject: document objects: page break; object separator (introduced & used, adjust later) * object separator introduced (requested Cory Doctorow, implementation not discussed) * dal_syntax, object separator syntax (<:---> or <:ols>) * constants, object separator * vim syntax highlighting, match object separator * document objects: page break; object separator (introduced & used) * dal, page break and object separator objects * represent page break and object separator: plaintext, html, epub, odf, texpdf (adjust later) --- lib/sisu/v2/constants.rb | 1 + lib/sisu/v2/dal_doc_str.rb | 8 +++++--- lib/sisu/v2/dal_syntax.rb | 1 - lib/sisu/v2/epub_format.rb | 5 +++++ lib/sisu/v2/epub_segments.rb | 3 ++- lib/sisu/v2/html_format.rb | 7 ++++++- lib/sisu/v2/html_scroll.rb | 2 ++ lib/sisu/v2/html_segments.rb | 3 ++- lib/sisu/v2/odf.rb | 19 +++++++++++++++---- lib/sisu/v2/odf_format.rb | 14 ++++++++++++++ lib/sisu/v2/plaintext.rb | 12 ++++++++++-- lib/sisu/v2/texpdf.rb | 6 ++++-- 12 files changed, 66 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/constants.rb b/lib/sisu/v2/constants.rb index 9367e9cd..6c949d53 100644 --- a/lib/sisu/v2/constants.rb +++ b/lib/sisu/v2/constants.rb @@ -93,6 +93,7 @@ Mx[:nbsp]= '░' #'▭ ' Mx[:br_line]= '▌' #lB ▌ 9612 #'┘' #'¶' Mx[:br_paragraph]= '█' #FB █ 9608 # PP ∥ 8741 #'▐' #'┘' #'¶' #FB █ 9608 lB ▌ 9612 RB ▐ 9616 Mx[:br_nl]= '』' # '┘' +Mx[:obj_ln_sep]= 'obj_ln_sep'; Hx[:obj_ln_sep]= {:obj=>Mx[:obj_ln_sep]} # line sep Mx[:br_page]= 'break_page'; Hx[:br_page]= {:obj=>Mx[:br_page]} # newpage Mx[:br_page_new]= 'break_page_new'; Hx[:br_page_new]= {:obj=>Mx[:br_page_new]} # clearpage Mx[:br_endnotes]= "#{Mx[:mk_o]}ENDNOTES#{Mx[:mk_c]}" diff --git a/lib/sisu/v2/dal_doc_str.rb b/lib/sisu/v2/dal_doc_str.rb index f1282df2..bca3cf7d 100644 --- a/lib/sisu/v2/dal_doc_str.rb +++ b/lib/sisu/v2/dal_doc_str.rb @@ -182,11 +182,13 @@ module SiSU_document_structure_extract SiSU_document_structure::Object_para.new.paragraph(h) else nil end - when /^(?:?)\s*$/ - if t_o =~/^(?:?)\s*$/ + when /^?\s*$/ + if t_o =~/^?\s*$/ SiSU_document_structure::Object_layout.new.break(Hx[:br_page_new]) else SiSU_document_structure::Object_layout.new.break(Hx[:br_page]) end + when /^?\s*$/ + SiSU_document_structure::Object_layout.new.break(Hx[:obj_ln_sep]) else #paragraph image=image_test(t_o) note=endnote_test?(t_o) @@ -784,7 +786,7 @@ module SiSU_document_structure_extract @o_array=[] node=ocn=ocn_dv=ocn_sp=ocnh=ocnh1=ocnh2=ocnh3=ocnh4=ocnh5=ocnh6=ocno=ocnp=ocnt=ocnc=ocng=ocni=ocnm=ocnu=ocnk=nm=0 # h heading, o other, t table, g group, i image node_count_flag=false - regex_exclude_ocn_and_node = /#{Rx[:meta]}|^@\S+?:\s|^4~endnotes|^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^\^~ |<:e[:_]\d+?>|^#{Mx[:fa_o]}p[bn]#{Mx[:fa_c]}|^<:\#|<:- |<[:!]!4|
|^<:\#|<:- |<[:!]!4|
< ]+?)([,.;'"]?)(?=[\s#{Mx[:en_a_c]}#{Mx[:en_b_c]}#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}]|$)/m, %{\\1#{Mx[:url_o]}\\2#{Mx[:url_c]}\\3}) end - dob.obj.gsub!(/<:?p([nb])>/,"#{Mx[:fa_o]}p\\1#{Mx[:fa_c]}") #indent used in endnotes, not implemented, replace when ready with: line.gsub!(/(?:
|
)\s*_([12])\s+/,'
<:i\1> ') dob=fontface(dob) dob.obj.gsub!(/<[:e]\s+(.+?)!?>/, "#{Mx[:en_a_o]}\\1#{Mx[:en_a_c]}") #not tested diff --git a/lib/sisu/v2/epub_format.rb b/lib/sisu/v2/epub_format.rb index e016b011..5b452f83 100644 --- a/lib/sisu/v2/epub_format.rb +++ b/lib/sisu/v2/epub_format.rb @@ -1744,6 +1744,11 @@ WOK end para_form_css('p','norm') end + def break + @txt.gsub!(/#{Mx[:br_page_new]}|#{Mx[:br_page]}/,'

') + @txt.gsub!(/#{Mx[:obj_ln_sep]}/,'

') + para_form_css('p','norm') + end def format(tag,attrib) para_form_css(tag,attrib) end diff --git a/lib/sisu/v2/epub_segments.rb b/lib/sisu/v2/epub_segments.rb index 4edcc93b..a09aebe3 100644 --- a/lib/sisu/v2/epub_segments.rb +++ b/lib/sisu/v2/epub_segments.rb @@ -357,7 +357,6 @@ WOK end def markup(dob) @debug=[] - dob.obj.gsub!(/(?:\s*#{Mx[:br_page]}\s*|\s*#{Mx[:br_page_new]}\s*)+/m,'') format_head_seg=SiSU_EPUB_Format::Head_seg.new(@md) if dob.is =~/(?:heading|para)/ #extend as necessary FIX @p_num=SiSU_EPUB_Format::Paragraph_number.new(@md,dob.ocn) @@ -392,6 +391,8 @@ WOK sto.code elsif dob.is=='table' sto.table + elsif dob.is=='break' + sto.break end if @md.flag_separate_endnotes # may need to revisit, check dob.obj.gsub!(/"\s+href="#note_ref(\d+)">/,%{" href=\"endnotes#{Sfx[:epub_xhtml]}#note_ref\\1">}) #endnote- twice #removed file type diff --git a/lib/sisu/v2/html_format.rb b/lib/sisu/v2/html_format.rb index 8074a7ee..a59f82ac 100644 --- a/lib/sisu/v2/html_format.rb +++ b/lib/sisu/v2/html_format.rb @@ -299,7 +299,7 @@ WOK #{@vz.banner_band} - #{doc_types} + #{doc_types}  #{firstseg}  @@ -1023,6 +1023,11 @@ WOK end para_form_css('p','norm') end + def break + @txt.gsub!(/#{Mx[:br_page_new]}|#{Mx[:br_page]}/,'

') + @txt.gsub!(/#{Mx[:obj_ln_sep]}/,'

') + para_form_css('p','norm') + end def format(tag,attrib) para_form_css(tag,attrib) end diff --git a/lib/sisu/v2/html_scroll.rb b/lib/sisu/v2/html_scroll.rb index 049eb922..bc6b4812 100644 --- a/lib/sisu/v2/html_scroll.rb +++ b/lib/sisu/v2/html_scroll.rb @@ -174,6 +174,8 @@ module SiSU_HTML_scroll sto.code elsif dob.is=='table' sto.table + elsif dob.is=='break' + sto.break end if dob =~// \ and dob =~/^(?:\^~\d+\s|)/ # hmmm re-adjusted 200507, for alt endnote which should again be matched ^~ ... not in response to problem though diff --git a/lib/sisu/v2/html_segments.rb b/lib/sisu/v2/html_segments.rb index a516d809..c1649a88 100644 --- a/lib/sisu/v2/html_segments.rb +++ b/lib/sisu/v2/html_segments.rb @@ -357,7 +357,6 @@ module SiSU_HTML_seg end def markup(dob) @debug=[] - dob.obj.gsub!(/(?:\s*#{Mx[:br_page]}\s*|\s*#{Mx[:br_page_new]}\s*)+/m,'') format_head_seg=SiSU_HTML_Format::Head_seg.new(@md) if dob.is !~/meta/ if dob.is =~/(?:heading|para)/ #extend as necessary FIX @@ -398,6 +397,8 @@ module SiSU_HTML_seg sto.code elsif dob.is=='table' sto.table + elsif dob.is=='break' + sto.break end if @md.flag_separate_endnotes dob.obj.gsub!(/"\s+href="#_(\d+)">/,%{" href=\"endnotes#{Sfx[:html]}#_\\1">}) #endnote- twice #removed file type diff --git a/lib/sisu/v2/odf.rb b/lib/sisu/v2/odf.rb index 769870eb..d19945c0 100644 --- a/lib/sisu/v2/odf.rb +++ b/lib/sisu/v2/odf.rb @@ -411,6 +411,18 @@ module SiSU_ODF end dob end + def obj_break(dob) + if dob.is =='break' + br=SiSU_ODF_format::Format_obj_break.new(@md,dob) + if dob.obj==Mx[:br_page] \ + or dob.obj==Mx[:br_page_new] + dob=br.br_page + elsif dob.obj==Mx[:obj_ln_sep] + dob=br.obj_sep + end + end + dob + end def odf_structure(md,dob) @md,@dob=md,dob dob=if dob.is !='code' @@ -431,6 +443,8 @@ module SiSU_ODF @@odf[:body] << code(dob).obj << @br*2 elsif dob.is=='table' #elsif dob.obj =~ /') - dob.obj.gsub!(/#{Mx[:br_page]}|#{Mx[:br_page_new]}/, - ' ') dob.obj.gsub!(/©/,'©') #too arbitrary dob.obj.gsub!(/.+?<-#>/,'') # remove dummy headings (used by html) #check dob.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/, @@ -517,7 +529,6 @@ module SiSU_ODF dob.obj.gsub!(/\\copy(?:right)?\b/,'©') dob.obj.gsub!(/\\trademark\b|\\tm\b/,'®') dob.obj.gsub!(/\44/,'$') #$ watch - dob.obj.gsub!(/#{Mx[:br_page]}|#{Mx[:br_page_new]}/,'') # remove page breaks dob.obj.gsub!(/(.+?)<\/a>/,'\1') dob.obj.gsub!(/#{Mx[:mk_o]}name#\S+?#{Mx[:mk_c]}/,'') # remove name links wordlist=dob.obj.scan(/\S+/) @@ -528,7 +539,7 @@ module SiSU_ODF if defined? dob.ocn and dob.ocn =~/\d+/ @p_num=SiSU_ODF_format::Paragraph_number.new(dob.ocn) end - if dob.is=~/heading|para|group|verse|code|table/ # extend, include other types + if dob.is=~/heading|para|group|verse|code|table|break/ # extend, include other types odf_structure(@md,dob) end dob.obj.gsub!(//,' ') if dob.obj ## Clean Prepared Text diff --git a/lib/sisu/v2/odf_format.rb b/lib/sisu/v2/odf_format.rb index 384b46b7..77b5bbbc 100644 --- a/lib/sisu/v2/odf_format.rb +++ b/lib/sisu/v2/odf_format.rb @@ -197,6 +197,20 @@ module SiSU_ODF_format @dob end end + class Format_obj_break + def initialize(md,t_o) + @md,@t_o=md,t_o + end + def br_page + @t_o.obj=' ' + @t_o + end + def obj_sep #center later + sep='--- ' + @t_o.obj=%{#{sep*20}} + @t_o + end + end class XML end end diff --git a/lib/sisu/v2/plaintext.rb b/lib/sisu/v2/plaintext.rb index e919af78..ba146978 100644 --- a/lib/sisu/v2/plaintext.rb +++ b/lib/sisu/v2/plaintext.rb @@ -328,13 +328,12 @@ WOK dob.obj.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _> _< dob.obj.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _<_< end - dob.obj.gsub!(/#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}/,'') # remove page breaks dob.obj.gsub!(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1') dob.obj.gsub!(/(.+?)<\/a>/m,'\1') dob.obj.gsub!(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'') # remove name links dob.obj.gsub!(/ |#{Mx[:nbsp]}/,' ') # decide on dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") - dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}image/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") + dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}image/,' [ \1 ]') dob.obj.gsub!(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') wordlist=dob.obj.scan(/\S+/) if dob.obj !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ @@ -350,6 +349,15 @@ WOK or dob.is=='code' \ or dob.is=='table' @plaintext[:body] << dob.obj << @br + elsif dob.is=='break' + sp=' ' + ln='-' + @plaintext[:body] <<=if dob.obj==Mx[:br_page] \ + or dob.obj==Mx[:br_page_new] + "#{@br}#{ln*40}#{@br*2}" + elsif dob.obj ==Mx[:obj_ln_sep] + "#{@br}#{sp*20}* * *#{@br*2}" + end # following empty line (@br) missing, fix end dob='' if (dob.obj =~// \ and dob.obj =~/^(-\{{2}~\d+|)/) # -endnote diff --git a/lib/sisu/v2/texpdf.rb b/lib/sisu/v2/texpdf.rb index 3a868cd8..a1a2813f 100644 --- a/lib/sisu/v2/texpdf.rb +++ b/lib/sisu/v2/texpdf.rb @@ -72,7 +72,7 @@ module SiSU_TeX @@tex_pattern_margin_number=/\\\\begin\\\{tiny\\\}\\\\hspace\\\{0mm\\\}\\\\end\\\{tiny\\\}\\\{\\\\marginpar.+?\s+/ @@n=@@tableheader=@@rights=nil @@date ||=SiSU_Env::Info_date.new - class Source #Songsheet #