# coding: utf-8 =begin * Name: SiSU * Description: a framework for document structuring, publishing and search * Author: Ralph Amissah * Copyright: (C) 1997 - 2009 Ralph Amissah All Rights Reserved. * License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . If you have Internet connection, the latest version of the GPL should be available at these locations: * SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system * Hompages: * Download: * Ralph Amissah ** Description: xml (dom style) output processing ** Notes: tidy -xml dom.xml >> index.tidy =end module SiSU_XML_DOM require "#{SiSU_lib}/defaults" include SiSU_Viz require "#{SiSU_lib}/particulars" include SiSU_Particulars require "#{SiSU_lib}/sysenv" include SiSU_Env require "#{SiSU_lib}/dal" require "#{SiSU_lib}/shared_xml" require "#{SiSU_lib}/xml_format" include SiSU_XML_format include SiSU_XML_munge require "#{SiSU_lib}/rexml" include SiSU_Rexml @@alt_id_count,@@tablehead,@@number_of_cols=0,0,0 @@tablefoot='' class Source def initialize(opt) @opt=opt @particulars=SiSU_Particulars::Combined_singleton.instance.get_all(opt) end def read begin @env,@md,@dal_array=@particulars.env,@particulars.md,@particulars.dal_array path=@env.path.output_tell loc=@env.url.output_tell tool=if @opt.cmd =~/[MV]/; "#{@env.program.web_browser} #{loc}/#{@md.fnb}/#{@md.fn[:dom]}\n\t#{@env.program.xml_viewer} #{path}/#{@md.fnb}/#{@md.fn[:dom]}" elsif @opt.cmd =~/v/; "#{@env.program.web_browser} #{loc}/#{@md.fnb}/#{@md.fn[:dom]}" else '' end SiSU_Screen::Ansi.new(@opt.cmd,'invert','XML DOM',tool).colorize unless @opt.cmd =~/q/ SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{@env.path.output_tell}/#{@md.fnb}/#{@md.fn[:dom]}").flow if @opt.cmd =~/[MV]/ SiSU_XML_DOM::Source::Songsheet.new(@particulars).songsheet rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error ensure #file closed in songsheet end end private class Songsheet def initialize(particulars) @env,@md,@dal_array,@particulars=particulars.env,particulars.md,particulars.dal_array,particulars end def songsheet begin SiSU_XML_DOM::Source::Scroll.new(@particulars).songsheet SiSU_XML_DOM::Source::Tidy.new(@md,@env).xml if @md.cmd =~/[vVM]/ # test wellformedness, comment out when not in use SiSU_Rexml::Rexml.new(@md,@md.fn[:dom]).xml if @md.cmd =~/M/ # test rexml parsing, comment out when not in use #debug rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error ensure end end end class Scroll require "#{SiSU_lib}/shared_txt" include SiSU_text_utils @@dp=nil @@xml={ :body=>[],:open=>[],:close=>[],:head=>[],:sc=>[] } def initialize(particulars) @env,@md,@dal_array=particulars.env,particulars.md,particulars.dal_array @vz=SiSU_Env::Get_init.instance.skin @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern @regx=/^(?:(?:#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}\s*)?#{Mx[:lv_o]}\d:(\S*)#{Mx[:lv_c]}\s*)?(.+?)\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ @tab="\t" @trans=SiSU_XML_munge::Trans.new(@md) @sys=SiSU_Env::System_call.new end def songsheet pre @data=markup(@dal_array) post publish end protected def xml_markup(para='') para.gsub!(/#{Mx[:en_a_o]}(\d+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/, '\1\2 ') para.gsub!(/#{Mx[:en_b_o]}([*+]\d+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_b_c]}/, '\1\2 ') para.gsub!(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/, '\1\2 ') end def xml_head(meta) txt=meta.text txt.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,' ') txt.gsub!(/ & /,' and ') el=meta.el.gsub(/\./,'_') el_txt=meta.el.gsub(/\./,' ') @@xml[:head] <<= if meta.type == 'meta' < #{@tab*2}#{el_txt.capitalize}: #{@tab*2}<#{el}> #{@tab*3}#{txt} #{@tab*2} #{@tab} WOK else '' end end def xml_sc(md='') sc=if @md.sc_info < filename: #{@md.sc_filename}
version number: #{@md.sc_number}
version date: #{@md.sc_date}
WOK else '' end @@xml[:sc]=sc #<<< #{@tab*n2} #{@tab*n3}#{ocn}#{tag} #{@tab*n3}#{para[@regx,2]} #{@tab*n2} #{@tab*n1}#{xml_content} WOK if lv == 4 @copen[1]=true @copen[2]=@copen[3]=false elsif lv == 5 @copen[2]=true @copen[3]=false elsif lv == 6 @copen[3]=true end end def xml_structure(lv='',ocn='',para='',hname='' ) lv=lv.to_i n=lv - 1 n1=lv n2=lv + 1 n3=lv + 2 v=lv - 3 tag='' tag="\n#{@tab*n3}#{hname}\n" if hname !=nil #if para[@regx] # paragraph="#{para[@regx,2]}" # util=SiSU_text_utils::Paragraph.new(paragraph,70) # wrapped=util.line_wrap #end case lv when 1..3 xml_element="" 3.downto(lv) do |x| y=x - 1 if @cont[1] \ or @cont[2] \ or @cont[3] @@xml[:body] << "#{@tab*5}\n" end #@@xml[:body] << "#{@tab*5}\n" if @cont[1] == true or @cont[2] == true or @cont[3] == true @cont[1]=false if @cont[1] @cont[2]=false if @cont[2] @cont[3]=false if @cont[3] ####### attempt to close contents if @copen[3] # 6~ [3,2,1].each { |v| @@xml[:body] << "#{@tab*n}\n" } @copen[1]=@copen[2]=@copen[3]=false elsif @copen[2] # 5~ [2,1].each { |v| @@xml[:body] << "#{@tab*n}\n" } @copen[1]=@copen[2]=@copen[3]=false elsif @copen[1] # 4~ [1].each { |v| @@xml[:body] << "#{@tab*n}\n" } @copen[1]=@copen[2]=@copen[3]=false end @@xml[:body] << "#{@tab*y}\n" if @level[x] @level[x]=false end when 4..6 6.downto(lv) do |x| y=x - 1 if @level[x] == true u=x - 3; @xml_contents_close[x]='' end end cv=lv - 3 xml_element="" xml_content="\n#{@tab*5}" case lv when 4 @@xml[:body] << "#{@tab*5}\n" if @cont[1] if @copen[3] == true # 6~ [3,2,1].each { |v| @@xml[:body] << "#{@tab*n}\n" } elsif @copen[2] == true # 5~ [2,1].each { |v| @@xml[:body] << "#{@tab*n}\n" } elsif @copen[1] == true # 4~ [1].each { |v| @@xml[:body] << "#{@tab*n}\n" } end @cont[1]=true when 5 if @cont[2] \ or @cont[1] @@xml[:body] << "#{@tab*5}\n" end if @copen[3] == true #6~ [3,2].each { |v| @@xml[:body] << "#{@tab*n}\n" } elsif @copen[2] == true #5~ [2].each { |v| @@xml[:body] << "#{@tab*n}\n" } end @cont[2]=true when 6 if @cont[3] \ or @cont[2] \ or @cont[1] @@xml[:body] << "#{@tab*5}\n" end if @copen[3] #6{ [3].each { |v| @@xml[:body] << "#{@tab*n}\n" } end @cont[3]=true end end xml_element(lv,ocn,para,hname,tag,xml_element,xml_content) @level[lv]=true ((lv+1)..6).each { |x| @level[x]=false } end def group_structure(para='',ocn='') para.gsub!(/#{Mx[:gr_o]}group(?:-end)?#{Mx[:gr_c]}/,'') para=@trans.markup_group(para) para.gsub!(/#{Mx[:en_a_o]}(\d+)\s+(.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/m, '\1\2 ') para.strip! @@xml[:body] << %{#{@tab*6}} << "\n" @@xml[:body] << %{#{@tab*7}#{ocn}} << "\n" @@xml[:body] << %{#{@tab*7}#{@tab*1}\n} @@xml[:body] << %{#{@tab*8}#{para}#{@tab*1}\n} @@xml[:body] << %{#{@tab*7}\n} @@xml[:body] << "#{@tab*6}" << "\n" end def poem_structure(para='',ocn='') para.gsub!(/#{Mx[:gr_o]}verse(?:-end)?#{Mx[:gr_c]}/,'') para=@trans.markup_group(para) para.strip! @@xml[:body] << %{#{@tab*6}} << "\n" @@xml[:body] << %{#{@tab*7}#{ocn}} << "\n" @@xml[:body] << %{#{@tab*7}#{@tab*1}\n} @@xml[:body] << %{#{@tab*8}#{para}#{@tab*1}\n} @@xml[:body] << %{#{@tab*7}\n} @@xml[:body] << "#{@tab*6}" << "\n" end def code_structure(para='',ocn='') para.gsub!(/#{Mx[:gr_o]}code(?:-end)?#{Mx[:gr_c]}/,'') para=@trans.markup_group(para) para.gsub!(/\s\s/,'  ') para.strip! @@xml[:body] << %{#{@tab*6}} << "\n" @@xml[:body] << %{#{@tab*7}#{ocn}} << "\n" @@xml[:body] << %{#{@tab*7}#{@tab*1}\n} @@xml[:body] << %{#{@tab*8}#{para}#{@tab*1}\n} @@xml[:body] << %{#{@tab*7}\n} @@xml[:body] << "#{@tab*6}" << "\n" end def table_structure(table='',ocn='') #tables @@xml[:body] << %{#{@tab*0}} << "\n" #if para[@regx] @@xml[:body] << %{#{@tab*1}#{table}\n#{@tab*1}\n} # unless lv # main text, contents, body KEEP @@xml[:body] << "#{@tab*0}" << "\n" #if para[@regx] @endnotes=[] end def markup(data) xml_sc(@md) @level,@cont,@copen,@xml_contents_close=[],[],[],[] @rcdc=false (0..6).each { |x| @cont[x]=@level[x]=false } (4..6).each { |x| @xml_contents_close[x]='' } data.each do |para| @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 para=@trans.markup(para) if para =~/^#{Rx[:meta]}\s*(.+?)$/ # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta if d_meta; xml_head(d_meta) end end if @rcdc==false \ and (para =~/~metadata/ \ or para =~/#{Mx[:lv_o]}1:meta#{Mx[:lv_c]}\s*Document Information/) @rcdc=true end if para !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ if para =~/.+?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ paranum=para[@regx,3] @p_num=SiSU_XML_format::Paragraph_number.new(@md,paranum) end @sto=SiSU_text_parts::Split_text_object.new(@md,para).xml ### problem in scroll, it appears tables are getting paragraph numbers unless @rcdc m=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ if para =~m txt_obj={:txt =>@sto.text} format_scroll=SiSU_XML_format::Format_scroll.new(@md,txt_obj) if @sto.format =~/i[1-9]|ordinary/ case @sto.format when /^(1):(\S*)/ xml_markup(para) xml_structure($1,@sto.ocn,para,$2) para=@sto.lev_para_ocn.heading_body1 #if para =~m when /^(2):(\S*)/ xml_markup(para) xml_structure($1,@sto.ocn,para,$2) para=@sto.lev_para_ocn.heading_body2 #if para =~m when /^(3):(\S*)/ xml_markup(para) xml_structure($1,@sto.ocn,para,$2) para=@sto.lev_para_ocn.heading_body3 #if para =~m when /^(4):(\S+)/ # work on see SiSU_text_parts::Split_text_object xml_markup(para) xml_structure($1,@sto.ocn,para,$2) para=@sto.lev_para_ocn.heading_body4 #if para =~m when /^(5):(\S*)/ xml_markup(para) xml_structure($1,@sto.ocn,para,$2) para=@sto.lev_para_ocn.heading_body5 #if para =~m when /^(6):(\S*)/ xml_markup(para) xml_structure($1,@sto.ocn,para,$2) para=@sto.lev_para_ocn.heading_body6 #if para =~m else matched=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/mi.match(para) stamp,ocn=matched[0],matched[1] if para =~ /#{Mx[:gr_o]}verse#{Mx[:gr_c]}/ para.gsub!(/#{stamp}/,'') poem_structure(para,ocn) elsif para =~ /#{Mx[:gr_o]}group#{Mx[:gr_c]}/ para.gsub!(/#{stamp}/,'') group_structure(para,ocn) elsif para =~ /#{Mx[:gr_o]}code#{Mx[:gr_c]}/ para.gsub!(/#{stamp}/,'') code_structure(para,ocn) elsif para =~/#{Mx[:gr_o]}Th?.+/ # tables come as single block #work area 2005w13 table=SiSU_Tables::Table_xml.new(para,ocn) para=table.table_split @@xml[:body] << table_structure(para,ocn) else #xml_structure(para, nil, nil, nil) type=case para when /^\s*(?:#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}\s*)?#{Mx[:gl_bullet]}/ m=$1 para.gsub!(/^(\s*(?:#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}\s*)?)#{Mx[:gl_bullet]}/,'\1') "indent_bullet#{m}" when /^\s*#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}/; "indent#{$1}" else 'norm' end xml_markup(para) if para[@regx] \ and para[@regx,3] @@xml[:body] << %{#{@tab*6}} << "\n" end @@xml[:body] << "#{@tab*7}#{para[@regx,3]}" << "\n" if para[@regx,3] @@xml[:body] << %{#{@tab*7}#{para[@regx,2]}\n} if para[@regx,2] # main text, contents, body KEEP @@xml[:body] << "#{@tab*6}" << "\n" if para[@regx] end end elsif para =~/(#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ \ and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #format_scroll=MonoSiSU.new('
Note') #para=format_scroll.boldPara elsif para =~/(MetaData)/ \ and para =~/#{Mx[:id_o]}~\d+;[m]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #debug 2003w46 add rc info txt_obj={:txt =>'
MetaData'} format_scroll=Format_scroll.new(@md,txt_obj) para=format_scroll.bold_para elsif para =~/(Owner Details)/ \ and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ txt_obj={:txt =>'
Owner Details'} format_scroll=Format_scroll.new(@md,txt_obj) @@xml[:owner_details]=format_scroll.bold_para para='' #elsif para =~/(.*)<:#>(.*)/ # one,two=$1,$2 # format_text=Format_text_object.new(one,two) # para=format_text.seg_no_paranum end if para =~// \ and para =~/^(-\{{2}~\d+|)/ # -endnote para='' end if para =~/.*<:#>.*$/ para=if para =~ /#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/ txt_obj={:txt =>para} format_text=Format_text_object.new(@md,txt_obj) format_text.scr_inden_ocn_e_no_paranum end end if para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/ # i don't get the condition for no paranum end else # end para.gsub!(/#{Mx[:pa_o]}:\S+#{Mx[:pa_c]}/,'') if para end end @content_flag=true 6.downto(4) do |x| y=x - 1; v=x - 3 if @level[x] == true #2004w36 bug fix? watch/test previous logic broke on free.for.all @coontent_flag introduced if @content_flag==true @@xml[:body] << "#{@tab*5}\n#{@tab*y}\n" @content_flag=false else @@xml[:body] << "\n#{@tab*y}\n" end end end 3.downto(1) do |x| y=x - 1 @@xml[:body] << "#{@tab*y}\n" if @level[x] == true end #6.downto(1) { |x| y=x - 1; @@xml[:body] << "#{@tab*y}\n" if @level[x] == true } end def pre rdf=SiSU_XML_tags::RDF.new(@md) dir=SiSU_Env::Info_env.new css=SiSU_Env::CSS_select.new(@md).xml_dom encoding=if @sys.locale =~/utf-?8/i; '' else '' end @@xml[:open] =< #{rdf.comment_xml} WOK @@xml[:head] << "\n" @@xml[:body] << "\n" end def post @@xml[:head] << @@xml[:sc] @@xml[:head] << "\n" @@xml[:body] << "\n" @@xml[:close] = "\n" end def publish content=[] content << @@xml[:open] << @@xml[:head] << @@xml[:body] << @@xml[:metadata] content << @@xml[:owner_details] if @md.stmp =~/\w\w/ content << @@xml[:tail] << @@xml[:close] content.flatten!.compact! Output.new(content,@md).xml @@xml[:head],@@xml[:body],@@xml[:tail]=[],[],[] # check whether should be nil end end class Output include SiSU_Param def initialize(data,md) @data,@md=data,md end def xml SiSU_Env::SiSU_file.new(@md).mkdir filename_xml=SiSU_Env::SiSU_file.new(@md,@md.fn[:dom]).mkfile @data.each do |para| #para.strip! para.gsub!(/#{Mx[:pa_o]}:\S+#{Mx[:pa_c]}/,'') #; para.gsub!(/<:\S+?>|/,'') para="#{para}\n" unless para.empty? filename_xml.puts para end filename_xml.close end end class Tidy def initialize(md,dir) @md,@env=md,dir @prog=SiSU_Env::Info_program.new end def xml if @prog.tidy !=false if @md.cmd =~/[VM]/ tell=SiSU_Screen::Ansi.new(@md.cmd,'invert','Using XML Tidy','check document structure') tell.colorize unless @md.cmd =~/q/ tell.grey_open unless @md.cmd =~/q/ tidyfile='/dev/null' #don't want one or screen output, check for alternative flags tidy=SiSU_Env::System_call.new("#{@env.path.output}/#{@md.fnb}/#{@md.fn[:dom]}",tidyfile) tidy.well_formed? tell.p_off unless @md.cmd =~/q/ end end end end end end __END__