# coding: utf-8 =begin * Name: SiSU * Description: a framework for document structuring, publishing and search * Author: Ralph Amissah * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. * License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . If you have Internet connection, the latest version of the GPL should be available at these locations: * SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system * Hompages: * Download: * Ralph Amissah ** Description: opendocument text generation =end module SiSU_ODF require "#{SiSU_lib}/particulars" # particulars.rb include SiSU_Particulars require "#{SiSU_lib}/dal" # dal.rb require "#{SiSU_lib}/sysenv" # sysenv.rb include SiSU_Env include SiSU_Viz require "#{SiSU_lib}/odf_format" # odf_format.rb include SiSU_ODF_format require "#{SiSU_lib}/shared_metadata" # shared_metadata.rb require "#{SiSU_lib}/shared_txt" # shared_txt.rb @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0 class Source require 'zlib' require 'find' require 'fileutils' include FileUtils def initialize(opt) @opt=opt @particulars=SiSU_Particulars::Combined_singleton.instance.get_all(opt) @@endnotes_para=[] end def read begin @env,@md,@dal_array=@particulars.env,@particulars.md,@particulars.dal_array @env.odf_structure opendoc=@md.fn[:odf] path=@env.path.output_tell tool=if @opt.cmd =~/[MVv]/; "#{@env.program.odf_viewer} #{path}/#{@md.fnb}/#{opendoc}" else '' end tell=SiSU_Screen::Ansi.new(@opt.cmd,'Opendocument (ODF:ODT)',tool) tell.green_hi_blue unless @opt.cmd =~/q/ tell=SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{@env.path.output_tell}/#{@md.fnb}/#{opendoc}") tell.flow if @opt.cmd =~/[MV]/ my_make=SiSU_Env::Create_file.new(@opt.cmd,@opt.fns) SiSU_ODF::Source::Scroll.new(@particulars).songsheet SiSU_Env::Info_skin.new(@md).select rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error ensure end end private class Scroll [],:open=>[],:close=>[],:head=>[],:metadata=>[],:tail=>[],:endnotes=>[] } @@docstart=true @@fns=nil def initialize(particulars) @env,@md,@dal_array=particulars.env,particulars.md,particulars.dal_array @vz=SiSU_Env::Get_init.instance.skin @tab="\t" @brace_url=SiSU_Viz::Skin.new.url_decoration @br=if @md.cmd =~/M/; "\n" else '' end end def songsheet pre @data=markup(@dal_array) post publish end # Used for extraction of endnotes from paragraphs def extract_endnotes(dob='') notes=dob.obj.scan(/#{Mx[:en_a_o]}(\d+\s+.+?)#{Mx[:en_a_c]}/)[1] #FIX @n=[] notes.each do |n| #high cost to deal with
appropriately within odf, consider n=n.dup.to_s if n =~/#{Mx[:br_line]}/ fix=n.split(/#{Mx[:br_line]}/) #watch #added fix.each do |x| if x =~/\S+/; @n << x end end else @n << n end end end def odf_metadata @@odf[:metadata]=Metadata::Summary.new(@md).odf.metadata end def odf_tail generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})" if @md.sisu_version[:version] lastdone="Last Generated on: #{Time.now}" rubyv="Ruby version: #{@md.ruby_version}" sc=if @md.sc_info "Source file: #{@md.sc_filename}\nVersion number: #{@md.sc_number}\nVersion date: #{@md.sc_date}\n" else '' end url=@md.fnb fn=@md.fn[:manifest] manifest="#{@vz.url_root_http}/#{url}/#{fn}" @@odf[:tail] << %{Available document outputs:
<#{manifest}>
} @@odf[:tail] << %{\nSiSU: <www.jus.uio.no/sisu> and <www.sisudoc.org>} @@odf[:tail] << "\n" end def heading(dob) dob.obj.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check m=/#{$1}/ breakpage='' if @md.fns \ and @md.fns != '' \ and @md.fns !=@@fns @@docstart=true @@fns=@md.fns end unless @@docstart breakpage=if (@md.pagenew or @md.pagebreak) \ and (@md.pagenew =~ m or @md.pagebreak =~m) ' ' else '' end end @@docstart=false dob.obj=%{#{breakpage}#{dob.obj}} dob end def image_src(i) image_source=if @md.fns =~/\.ss[tm]$/ \ and FileTest.file?("#{@env.path.image_source_local_tex}/#{i}") #review @env.path.image_source_local_tex elsif @md.fns =~/\.-ss[tm]$/ \ and FileTest.file?("#{@env.path.image_source_remote_tex}/#{i}") @env.path.image_source_remote_tex elsif FileTest.file?("#{@env.path.image_source_tex}/#{i}") @env.path.image_source_tex else tell=SiSU_Screen::Ansi.new(@md.cmd,"ERROR - image:",%{"#{i}" missing},"search locations: #{@env.path.image_source_local_tex},#{@env.path.image_source_remote_tex} and #{@env.path.image_source_tex}") tell.error2 unless @md.cmd =~/q/ nil end end def image_odf(img) # copy image to od image directory (unless exists) # divide pixel dimension by 37.79485 and retain 3 decimal places m,u=img[1],img[2] i=/^(\S+?\.(?:png|jpg|gif))/.match(m).captures.join if m =~/^(\S+?\.(?:png|jpg|gif))/ c=/^\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"/.match(m).captures.join if m =~/^\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"/ w,h=/(\d+)x(\d+)/.match(m).captures if m =~/\d+x\d+/ w=(w.to_i/37.79485).to_s h=(h.to_i/37.79485).to_s h=/([0-9]+\.\d{0,3})/.match(h).captures.join w=/([0-9]+\.\d{0,3})/.match(w).captures.join image_source=image_src(i) pwd=Dir.pwd cp("#{image_source}/#{i}","#{@env.path.odf}/Pictures/#{i}") if image_source img=if i.to_s =~/jpg|png|gif/ \ and h.to_s =~/\d/ \ and w.to_s =~/\d/ @@img_count +=1 %{#{c}} #anchor-type: as-char or paragraph or char or ... else %{[image omitted]} end end def image(dob) m=if dob.obj =~/#{Mx[:lnk_o]}[ ]*(.+?)[ ]*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/ dob.obj.scan(/(#{Mx[:lnk_o]}[ ]*(.+?)[ ]*#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]})/) elsif dob.obj =~/#{Mx[:lnk_o]}[ ]*(.+?)[ ]*#{Mx[:lnk_c]}image/ dob.obj.scan(/(#{Mx[:lnk_o]}[ ]*(.+?)[ ]*#{Mx[:lnk_c]}(image))/) else nil end if m; m.each do |i| cont,url=i[1],i[2] cont.gsub!(/([)(\]\[])/,"\\\\\\1") cont.gsub!(/([+?])/,"\\\\\\1") # incorrect handling of + url.gsub!(/([+?])/,"\\\\\\1") dob.obj.sub!(/#{Mx[:lnk_o]}[ ]*#{cont}[ ]*#{Mx[:lnk_c]}#{Mx[:url_o]}#{url}#{Mx[:url_c]}/m,image_odf(i)) #watch dob.obj.gsub!(/\\([)(\]\[?])/,'\1') #clumsy fix end m=nil end dob end def text_link_odf(txt,url,trail) txt.gsub!(/(\\\+)/,'+') #this is convoluted, and risky :-( url.gsub!(/(\\\+)/,'+') #this is convoluted, and risky :-( %{#{txt.strip}#{trail}} end def text_link(dob) m=dob.obj.scan(/(#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]})/) #sort if m m.each do |i| txt,url,trail=i[1],i[2] txt.gsub!(/([)(\]\[])/,"\\\\\\1") txt.gsub!(/([+?*])/,"\\\\\\1") # problems with + url.gsub!(/([+?])/,"\\\\\\1") # problems with + dob.obj.gsub!(/#{Mx[:lnk_o]}[ ]*#{txt}#{Mx[:lnk_c]}#{Mx[:url_o]}#{url}#{Mx[:url_c]}/m,text_link_odf(txt,url,trail)) #make sure trailing ']' are not caught in url dob.obj.gsub!(/\\([)(\]\[?])/,'\1') #clumsy fix end m=nil end dob end def normal(dob) #P1 - P3 dob.obj.gsub!(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/, '\1') #http ftp matches escaped, no decoration dob.obj.gsub!(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/, %{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}}) #http ftp matches with decoration dob.obj.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/, %{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}}) if dob.obj !~/http:\/\// # improve upon, document crash where url contains '@' symbol dob.obj= if dob.is=='para' and dob.indent.to_s =~/[0-9]/ # and t_o.bullet_==true %{#{dob.obj}} else %{#{dob.obj}} end dob end def fontface(dob) end def footnote(t_o) str=if defined? t_o.obj; t_o.obj elsif t_o.class==String; t_o end if str @astx||=10000 if str =~/#{Mx[:en_a_o]}\d+\s+/ str.gsub!(/#{Mx[:en_a_o]}(\d+)\s+(.+?)#{Mx[:en_a_c]}/,'\1 \2') end if str=~/#{Mx[:en_a_o]}[*+]+\s/ asterisk=str.scan(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)#{Mx[:en_a_c]}/) asterisk.each do |x| a=x[0].gsub(/([*+])/,"\\\\\\1") str.gsub!(/#{Mx[:en_a_o]}(#{a})\s+(.+?)#{Mx[:en_a_c]}/,%{\\1 \\2}) @astx+=1 end end if str=~/#{Mx[:en_b_o]}[*+]\d+\s/ asterisk=str.scan(/#{Mx[:en_b_o]}([*+]\d+)\s+(.+?)#{Mx[:en_b_c]}/) asterisk.each do |x| a=x[0].gsub(/([*+])/,"\\\\\\1") str.gsub!(/#{Mx[:en_b_o]}(#{a})\s+(.+?)#{Mx[:en_b_c]}/,%{\\1 \\2}) @astx+=1 end end end if defined? t_o.obj; t_o.obj=str elsif t_o.class==String; t_o=str end t_o end def group_clean(str) str.gsub!(/&nbsp;| |#{Mx[:nbsp]}/,' ') str.gsub!(//,'>') str.gsub!(/<(text:span text:style-name="T[1-5]"|\/text:span)>/,'<\1>') #works, not ideal str.gsub!(/#{Mx[:br_line]}/,'
') str.gsub!(/<br(?:\s+\/)?>/,'
') str end def poem(dob) #P4 #same as group parray=[] dob.obj.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/).each do |parablock| parablock=group_clean(parablock) parray << %{#{parablock}} if parablock =~/\S+/ end dob.obj=parray.join + '' dob end def group(dob) #P4 #same as verse parray=[] dob.obj.gsub!(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/, '\1') #http ftp matches escaped, no decoration dob.obj.gsub!(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/, %{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}}) #http ftp matches with decoration dob.obj.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/, %{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}}) if dob.obj !~/http:\/\// # improve upon, document crash where url contains '@' symbol dob.obj.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/).each do |parablock| parablock=group_clean(parablock) parablock.gsub!(/<text:a xlink:type="simple" xlink:href="(.+?)">/m,'') parablock.gsub!(/<(\/text:a)>/,'<\1>') parablock.gsub!(/<(text:note text:id=.+?)>/,'<\1>') parablock.gsub!(/<(text:p text:style-name="Footnote")>/,'<\1>') parablock.gsub!(/<(\/?text:(?:note-citation|note-body|note|p))>/,'<\1>') parablock=footnote(parablock) parray << %{#{parablock}} if parablock =~/\S+/ end dob.obj=parray.join + '' dob end def code(dob) #P5 if dob.is=='code' dob.obj.gsub!(/\s\s/,'  ') parray=[] dob.obj.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/).each do |parablock| parablock=group_clean(parablock) parablock.gsub!(/^\s*$/,'
') parablock.gsub!(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/, '\1') #http ftp matches escaped, no decoration parray << %{#{parablock}} if parablock =~/\S+/ end dob.obj=parray.join + '' end dob end def table(dob) # if dob.is =='table' table=SiSU_ODF_format::Table.new(@md,dob) dob=table.table end dob end def odf_structure(md,dob) @md,@dob=md,dob dob=if dob.is !='code' dob=if dob.obj =~/#{Mx[:lnk_o]}[ ]*\S+?\.(?:png|jpg|gif)\s.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/; image(dob) elsif dob.obj =~/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/; text_link(dob) else dob end else dob end dob=footnote(dob) if dob.is=='heading' @@odf[:body] << heading(dob).obj << @br*2 elsif dob.is =='verse' @@odf[:body] << poem(dob).obj << @br*2 elsif dob.is=='group' @@odf[:body] << group(dob).obj << @br*2 elsif dob.is=='code' @@odf[:body] << code(dob).obj << @br*2 elsif dob.is=='table' #elsif dob.obj =~ /<\-_&!@%~#\]\[*=$| \n+`#{Mx[:tc_p]}]/u dir=SiSU_Env::Info_env.new(@md.fns) @data_mod,@endnotes,@level,@cont,@copen,@odf_contents_close=Array.new(6){[]} @rcdc=false (0..6).each { |x| @cont[x]=@level[x]=false } (4..6).each { |x| @odf_contents_close[x]='' } odf_tail #($1,$2) fix=[] bullet=image_src('bullet_09.png') cp("#{bullet}/bullet_09.png","#{@env.path.odf}/Pictures/.") #if image_src('bullet_09.png') odf_metadata data.each do |dob| #p dob.obj if dob.obj =~safe_characters and @md.cmd =~/V/ #KEEP dob.obj='' if dob.obj =~/#{Mx[:lv_o]}\d+:.*?#{Mx[:lv_c]}.+?#{Mx[:pa_non_object_dummy_heading]}/ #fix Mx[:lv_o] para_array=[] dob.obj.gsub!(//,'>') word=dob.obj.scan(/\S+|\n/) if word word.each do |w| # _ - / # | : ! ^ ~ unless dob =~/^(?:#{Rx[:meta]}|%+ )/m w.gsub!(/&#(?:126|152);/,'~') #126 usual if w !~/&\S{1,7};/ \ or w =~/ / w.gsub!(/&/,'&') #watch   end end para_array << w end dob.obj=para_array.join(' ') dob.obj=dob.obj.strip end if dob.is=='code' #{Mx[:gr_o]}code#{Mx[:gr_c]}/ #fix #code-block: angle brackets special characters #fix dob.obj.gsub!(/(^|[^}])_/m,'\1>') dob.obj.gsub!(/(^|[^}])_/m,'\1>') end if dob.of=='group' dob.obj.gsub!(/#{Mx[:gl_bullet]}/,'● ') end dob.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') dob.obj.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1') dob.obj.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1') dob.obj.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check dob.obj.gsub!(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;') dob.obj.gsub!(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;') dob.obj.gsub!(/#{Mx[:mk_o]}[~-]##{Mx[:mk_c]}/,'') if dob.is=='para' \ and dob.bullet_ dob.obj=' ' + dob.obj end dob.obj.gsub!(/#{Mx[:br_line]}/,'
') dob.obj.gsub!(/#{Mx[:br_page]}|#{Mx[:br_page_new]}/,' ') dob.obj.gsub!(/©/,'©') #too arbitrary dob.obj.gsub!(/.+?<-#>/,'') # remove dummy headings (used by html) #check dob.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') dob.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') dob.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') dob.obj.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1') dob.obj.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1') dob.obj.gsub!(/­/u,'-') dob.obj.gsub!(/ /u, ' ') # space identify dob.obj.gsub!(/ /u, ' ') # space identify dob.obj.gsub!(/·/u,'*') dob.obj.gsub!(/[­–—]/u,'-') #— – chk dob.obj.gsub!(/ < /i,'<') dob.obj.gsub!(/\\copy(?:right)?\b/,'©') dob.obj.gsub!(/\\trademark\b|\\tm\b/,'®') dob.obj.gsub!(/\44/,'$') #$ watch dob.obj.gsub!(/#{Mx[:br_page]}|#{Mx[:br_page_new]}/,'') # remove page breaks dob.obj.gsub!(/(.+?)<\/a>/,'\1') dob.obj.gsub!(/#{Mx[:mk_o]}name#\S+?#{Mx[:mk_c]}/,'') # remove name links wordlist=dob.obj.scan(/\S+/) dob.obj=tidywords(wordlist).join(' ').strip @rcdc=true if @rcdc==false \ and (dob.obj =~/~metadata/ or dob =~/#{Mx[:lv_o]}1:meta#{Mx[:lv_x]}\s*Document Information/) #fix Mx[:lv_o] if dob.is !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ if defined? dob.ocn and dob.ocn =~/\d+/ @p_num=SiSU_ODF_format::Paragraph_number.new(dob.ocn) end if dob.is=~/heading|para|group|verse|code|table/ # extend, include other types odf_structure(@md,dob) end dob.obj.gsub!(//,' ') if dob.obj ## Clean Prepared Text dob.obj.gsub!(/#{Mx[:tc_o]}.+?#{Mx[:tc_c]}/,' ') if dob.obj ## CHECK Clean Prepared Text dob.obj.gsub!(/<:\S+>/,' ') if dob.obj ## Clean Prepared Text end end end def pre table=if @md.flag_tables %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} else '' end breakpage=if @md.pagenew \ or @md.pagebreak ' fo:break-before="page"' else '' end @@odf[:head]<<%{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{table}#{@br}} + %{#{@br}} + %{#{@br}} + # P1 %{#{@br}} + # P1 %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + # P1 %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{} end def post end def publish divider='=' content=[] data=@data content << @@odf[:open] content << @@odf[:head] content << @@odf[:body] content << @@odf[:metadata] content << @@odf[:tail] Output.new(content,@md,@env).odf @@odf[:head],@@odf[:body],@@odf[:tail],@@odf[:metadata]=[],[],[],[] end end class Output ● #bullet dob.obj.gsub!(/^(#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]})\s*#{Mx[:gl_bullet]}/,'\1 · ') #bullet dob.obj.gsub!(/^#{Mx[:gl_bullet]}/,'· ') #bullet