# encoding: utf-8 =begin * Name: SiSU ** Description: documents, structuring, processing, publishing, search *** simple xml representation (sax style) ** Author: Ralph Amissah [ralph@amissah.com] [ralph.amissah@gmail.com] ** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Ralph Amissah, All Rights Reserved. ** License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see [http://www.gnu.org/licenses/]. If you have Internet connection, the latest version of the GPL should be available at these locations: [http://www.fsf.org/licensing/licenses/gpl.html] [http://www.gnu.org/licenses/gpl.html] ** SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system ** Hompages: [http://www.jus.uio.no/sisu] [http://www.sisudoc.org] ** Git [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary] [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/sst_to_s_xml_sax.rb;hb=HEAD] =end module SiSU_SimpleXML_ModelSax require_relative 'se_hub_particulars' # se_hub_particulars.rb include SiSU_Particulars require_relative 'dp' # dp.rb include SiSU_Param require_relative 'se' # se.rb include SiSU_Env require_relative 'ao_doc_str' # ao_doc_str.rb require_relative 'xml_shared' # xml_shared.rb include SiSU_XML_Munge require_relative 'shared_sem' # shared_sem.rb require_relative 'xml_format' # xml_format.rb include SiSU_XML_Format require_relative 'rexml' # rexml.rb include SiSU_Rexml @@alt_id_count=0 @@tablefoot='' class Convert @@fns=nil def initialize(opt) @opt=opt @particulars=SiSU_Particulars::CombinedSingleton.instance.get_env_md(opt) end def read begin @md=@particulars.md #bug, relies on info persistence, assumes -m has previously been run @env=@particulars.env SiSU_Screen::Ansi.new( @opt.act[:color_state][:set], 'invert', 'XML SAX', "#{@md.fns} -> #{@md.fn[:sxs]}" ).colorize unless @opt.act[:quiet][:set]==:on if (@opt.act[:verbose_plus][:set]==:on \ || @opt.act[:maintenance][:set]==:on) SiSU_Screen::Ansi.new( @opt.act[:color_state][:set], @opt.fns, "#{Dir.pwd}/#{@md.fn[:sxs]}" ).flow end unless @@fns==@opt.fns @@fns=@opt.fns @@fns_array=[] end @fns_array=if @@fns_array.empty?; read_fnm else @@fns_array.dup #check end SiSU_SimpleXML_ModelSax::Convert::Songsheet.new(@fns_array,@particulars).songsheet rescue SiSU_Errors::Rescued.new($!,$@,@opt.cmd,@opt.fns).location do __LINE__.to_s + ':' + __FILE__ end ensure #file closed in songsheet end end def read_fnm ao=[] if FileTest.file?("#{Dir.pwd}/#{@opt.fns}") ao=IO.readlines("#{Dir.pwd}/#{@opt.fns}","\n\n") else STDERR.puts 'Error' end end private class Songsheet def initialize(data,particulars) @data,@particulars,@env,@md=data,particulars,particulars.env,particulars.md end def songsheet begin SiSU_SimpleXML_ModelSax::Convert::Scroll.new(@data,@particulars).songsheet if (@md.opt.act[:verbose][:set]==:on \ || @md.opt.act[:verbose_plus][:set]==:on \ || @md.opt.act[:maintenance][:set]==:on) SiSU_SimpleXML_ModelSax::Convert::Tidy.new(@md,@env).xml # test wellformedness, comment out when not in use end SiSU_Rexml::Rexml.new(@md,@md.fn[:sxs]).xml if @md.opt.act[:maintenance][:set]==:on # test rexml parsing, comment out when not in use #debug rescue SiSU_Errors::Rescued.new($!,$@,@md.opt.cmd,@md.fns).location do __LINE__.to_s + ':' + __FILE__ end ensure end end end class Scroll require_relative 'txt_shared' # txt_shared.rb require_relative 'css' # css.rb include SiSU_TextUtils @@xml={ body: [], open: [], close: [], head: [] } def initialize(data='',particulars='') @data,@env,@md=data,particulars.env,particulars.md @regx=/^(?:#{Mx[:mk_o]}:p[bn]#{Mx[:mk_c]}\s*)?(?:#{Mx[:lv_o]}[1-9]:(\S*)#{Mx[:lv_c]})?(.+)/ @tab="\t" if @md @trans=SiSU_XML_Munge::Trans.new(@md) end @sys=SiSU_Env::SystemCall.new end def songsheet pre markup post publish end protected def embedded_endnotes(para='') para.gsub!(/~\{(.+?)\}~/,'\1 ') para.gsub!(/~\[([*+])\s+(.+?)\]~/,'\2 ') end def xml_head(meta) txt=meta.text txt.gsub!(/\/{(.+?)}\//,'\1') txt.gsub!(/[*!]{(.+?)}[*!]/,'\1') txt.gsub!(/_{(.+?)}_/,'\1') txt.gsub!(/-{(.+?)}-/,'\1') txt.gsub!(//,'
') txt.gsub!(/ & /,' and ') @@xml[:head] <<< #{@tab*2}<#{meta.el}> #{@tab*3}#{txt} #{@tab*2} #{@tab} WOK end def xml_sc(md='') sc=if @md.sc_info < #{@md.sc_filename} #{@md.sc_number} #{@md.sc_date} WOK else '' end @@xml[:sc]=sc end def xml_structure(para='',lv='',hname='') #extracted endnotes lv=lv.to_i lv=nil if lv==0 embedded_endnotes(para) if para[@regx] paragraph="#{para[@regx,2]}" util=SiSU_TextUtils::Wrap.new(paragraph,70) wrapped=util.line_wrap end @@xml[:body] << "#{@tab*0}" if para[@regx] @@xml[:body] << "#{@tab*1}" << "\n" if para[@regx] @@xml[:body] << if lv; %{#{@tab*1}\n#{@tab*2}#{wrapped}\n#{@tab*1}\n} << "\n" elsif wrapped =~/\A%%?\s+/; %{\n} # comments else %{#{@tab*1}\n#{@tab*2}#{wrapped}\n#{@tab*1}\n} # main text, contents, body KEEP end @@xml[:body] << "#{@endnotes}" if @endnotes # main text, endnotes KEEP @@xml[:body] << "#{@tab*0}" << "\n" if para[@regx] @endnotes=[] end def block_structure(para='') para.gsub!(/<:block(?:-end)?>/,'') para.strip! @@xml[:body] << %{#{@tab*0}} @@xml[:body] << %{#{@tab*1}#{@tab*1}\n} @@xml[:body] << %{#{@tab*2}#{para}#{@tab*1}\n} @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << "#{@tab*0}" end def group_structure(para='') para.gsub!(/<:group(?:-end)?>/,'') para.strip! @@xml[:body] << %{#{@tab*0}} @@xml[:body] << %{#{@tab*1}#{@tab*1}\n} @@xml[:body] << %{#{@tab*2}#{para}#{@tab*1}\n} @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << "#{@tab*0}" end def poem_structure(para='') para.gsub!(/<:verse(?:-end)?>/,'') para.strip! @@xml[:body] << %{#{@tab*0}} @@xml[:body] << %{#{@tab*1}#{@tab*1}\n} @@xml[:body] << %{#{@tab*2}#{para}#{@tab*1}\n} @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << "#{@tab*0}" << "\n" end def code_structure(para='') para.gsub!(/<:code(?:-end)?>/,'') para.strip! @@xml[:body] << %{#{@tab*0}} @@xml[:body] << %{#{@tab*1}#{@tab*1}\n} @@xml[:body] << %{#{@tab*2}#{para}#{@tab*1}\n} @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << "#{@tab*0}" << "\n" end def table_structure(table='') #tables @@xml[:body] << %{#{@tab*0}} @@xml[:body] << %{#{@tab*1}#{table}\n#{@tab*1}\n} # unless lv # main text, contents, body KEEP @@xml[:body] << "#{@tab*0}" << "\n" #if para[@regx] @endnotes=[] end def tidywords(wordlist) wordlist.each do |x| x.gsub!(/&/,'&') unless x =~/&\S+;/ end end def xml_clean(para) para.gsub!(/#{Mx[:gl_o]}[1-9]:\S*?#{Mx[:gl_c]}/,'') #Danger, watch para end def markup data=[] xml_sc(@md) @endnotes,@level,@cont,@copen,@xml_contents_close=[],[],[],[],[] @rcdc=false (0..6).each { |x| @cont[x]=@level[x]=false } (4..6).each { |x| @xml_contents_close[x]='' } @data.each do |para| data << SiSU_AO_DocumentStructureExtract::Structure.new(@md,para).structure #takes on Mx marks end data.each do |para| if para !~/^\s*(?:%+ |<:code>)/ if @md.sem_tag and para =~/[:;]\{|\}[:;]/ para=@trans.xml_semantic_tags(para) end if para =~/[:;]\{|\}[:;]/ para=SiSU_Sem::Tags.new(para,@md).rm.all end end para=@trans.markup_light(para) @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 para.gsub!(/^@(\S+?):/,"#{Mx[:lv_o]}@\\1#{Mx[:lv_c]}") if para =~/\A#{Mx[:lv_o]}@(\S+?)#{Mx[:lv_c]}\s*(.+?)\Z/m # for headers d_meta=SiSU_TextUtils::HeaderScan.new(@md,para).meta if d_meta; xml_head(d_meta) end end para='' if para=~/#{Mx[:lv_o]}@\S+?#{Mx[:lv_c]}/ if @rcdc==false \ and (para =~/~metadata/ or para =~/^1~meta\s+Document Information/) @rcdc=true end if para !~/(^@\S+?:|^\s*$||)/ @sto=SiSU_text_parts::SplitTextObject.new(@md,para).lev_segname_para unless @rcdc SiSU_XML_Format::FormatScroll.new(@md,@sto.text) if @sto.format =~/i[1-9]|ordinary/ case @sto.format when /^(1):(\S*)/ xml_clean(para) xml_structure(para,$1,$2) para=@sto.lev_para_ocn.heading_body1 when /^(2):(\S*)/ xml_clean(para) xml_structure(para,$1,$2) para=@sto.lev_para_ocn.heading_body2 when /^(3):(\S*)/ xml_clean(para) xml_structure(para,$1,$2) para=@sto.lev_para_ocn.heading_body3 when /^(4):(\S*)/ # work on see SplitTextObject xml_clean(para) xml_structure(para,$1,$2) para=@sto.lev_para_ocn.heading_body4 when /^(5):(\S*)/ xml_clean(para) xml_structure(para,$1,$2) para=@sto.lev_para_ocn.heading_body5 when /^(6):(\S*)/ xml_clean(para) xml_structure(para,$1,$2) para=@sto.lev_para_ocn.heading_body6 else if para =~ /<:verse>/ para=poem_structure(para) elsif para =~ /<:group>/ para=group_structure(para) elsif para =~ /<:code>/ para.gsub!(//,'>') para=code_structure(para) elsif para =~// \ and para =~/^(-\{{2}~\d+|)/ # -endnote para='' end if para =~/.*<:#>.*$/ para=case para when /<:i1>/ format_text=FormatTextObject.new(para,'') format_text.scr_inden_ocn_e_no_paranum when /<:i2>/ format_text=FormatTextObject.new(para,'') format_text.scr_inden_ocn_e_no_paranum end end if para =~/<:center>/ one,two=/(.*)<:center>(.*)/.match(para)[1,2] format_text=FormatTextObject.new(one,two) para=format_text.center end end para.gsub!(/<:\S+?>/,'') para.gsub!(//,'') ## Clean Prepared Text #bugwatch reinstate para end para end 6.downto(4) do |x| y=x - 1; v=x - 3 @@xml[:body] << "#{@tab*5}\n#{@tab*y}\n" if @level[x]==true end 3.downto(1) do |x| y=x - 1 @@xml[:body] << "#{@tab*y}\n" if @level[x]==true end end def pre rdf=SiSU_XML_Tags::RDF.new(@md) dir=SiSU_Env::InfoEnv.new @@xml[:head],@@xml[:body]=[],[] css=SiSU_Env::CSS_Select.new(@md).xml_sax encoding=if @sys.locale =~/utf-?8/i then '' else '' end @@xml[:open] =< #{rdf.comment_xml_sax} WOK @@xml[:head] << "\n" @@xml[:body] << "\n" end def post @@xml[:head] << @@xml[:sc] @@xml[:head] << "\n" @@xml[:body] << "\n" @@xml[:close] = "\n" end def publish content=[] content << @@xml[:open] << @@xml[:head] << @@xml[:body] << @@xml[:metadata] content << @@xml[:owner_details] if @md.stmp =~/\w\w/ content << @@xml[:tail] << @@xml[:close] Output.new(content.join,@md).xml @@xml={} end end class Output def initialize(data,md) @data,@md=data,md end def xml @sisu=[] @data.each do |para| para.gsub!(/<:\S+?>/,'') para.gsub!(//,'') para="#{para}\n" unless para.empty? @sisu << para end new_file_data=@sisu.join @sisu=new_file_data.scan(/.+/) SiSU_Env::FileOp.new(@md).mkdir filename_sxm=SiSU_Env::FileOp.new(@md,@md.fn[:sxs]).mkfile_pwd if filename_sxm.is_a?(File) @sisu.each {|para| filename_sxm.puts para} filename_sxm.close else puts 'file not created, is directory writable?' end end end class Tidy def initialize(md,dir) @md,@env=md,dir @prog=SiSU_Env::InfoProgram.new end def xml if @prog.tidy !=false #note values can be other than true if (@md.opt.act[:verbose_plus][:set]==:on \ || @md.opt.act[:maintenance][:set]==:on) SiSU_Screen::Ansi.new( @md.opt.act[:color_state][:set], 'invert', 'Using XML Tidy', 'check document structure' ).colorize unless @md.opt.act[:quiet][:set]==:on SiSU_Screen::Ansi.new( @md.opt.act[:color_state][:set], '', '', 'check document structure' ) tell.grey_open unless @md.opt.act[:quiet][:set]==:on tidyfile='/dev/null' #don't want one or screen output, check for alternative flags tidy =SiSU_Env::SystemCall.new("#{Dir.pwd}/#{@md.fn[:sxs]}",tidyfile) tidy.well_formed? tell.p_off unless @md.opt.act[:quiet][:set]==:on end end end end end end __END__