aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v0/odf.rb
diff options
context:
space:
mode:
authorRalph Amissah <ralph.amissah@gmail.com>2007-06-02 11:25:19 +0100
committerRalph Amissah <ralph.amissah@gmail.com>2007-06-02 11:25:19 +0100
commitd0f1974a7b93db754f70d013738e6ad7d16b4d24 (patch)
tree252594de9ed0f78fd398609b9a0f298eb5b56ccd /lib/sisu/v0/odf.rb
parentsisu-0.52.7 + md5s (diff)
0.53.0, pre-build, see changelog, library naming changed for scm, placed under v0 (instead of 0.53)
Diffstat (limited to 'lib/sisu/v0/odf.rb')
-rw-r--r--lib/sisu/v0/odf.rb719
1 files changed, 719 insertions, 0 deletions
diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb
new file mode 100644
index 00000000..caf86a39
--- /dev/null
+++ b/lib/sisu/v0/odf.rb
@@ -0,0 +1,719 @@
+=begin
+ * Name: SiSU information Structuring Universe - Structured information, Serialized Units
+ * Author: Ralph Amissah
+ * http://www.jus.uio.no/sisu
+ * http://www.jus.uio.no/sisu/SiSU/download.html
+
+ * Description: opendocument text generation
+
+ * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah
+
+ * License: GPL 2 or later
+
+ Summary of GPL 2
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+
+ If you have Internet connection, the latest version of the GPL should be
+ available at these locations:
+ http://www.fsf.org/licenses/gpl.html
+ http://www.gnu.org/copyleft/gpl.html
+ http://www.jus.uio.no/sisu/gpl2.fsf
+
+ SiSU was first released to the public on January 4th 2005
+
+ SiSU uses:
+
+ * Standard SiSU markup syntax,
+ * Standard SiSU meta-markup syntax, and the
+ * Standard SiSU object citation numbering and system
+
+ © Ralph Amissah 1997, current 2007.
+ All Rights Reserved.
+
+ * Notes: tidy -ascii index.xml >> index.tidy
+
+ * Ralph Amissah: ralph@amissah.com
+ ralph.amissah@gmail.com
+=end
+module SiSU_ODF
+ require "#{SiSU_lib}/dal"
+ require "#{SiSU_lib}/sysenv"
+ include SiSU_Env
+ include SiSU_Param
+ include SiSU_Viz
+ require "#{SiSU_lib}/odf_format"
+ include OD_format
+ require "#{SiSU_lib}/shared_txt"
+ @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0
+ class Source
+ require 'zlib'
+ require 'find'
+ require 'fileutils'
+ include FileUtils
+ def initialize(opt)
+ @opt=opt
+ @@endnotes_para=[]
+ end
+ def read
+ begin
+ @md=SiSU_Param::Parameters.new(@opt).get
+ @env=SiSU_Env::Info_env.new(@opt.fns)
+ @env.odf_structure
+ opendoc=@md.fn[:odf]
+ path=@env.path.output_tell
+ tool=if @opt.cmd =~/[MVv]/; "#{@env.program.odf_viewer} #{path}/#{@md.fnb}/#{opendoc}"
+ else ''
+ end
+ tell=SiSU_Screen::Ansi.new(@opt.cmd,'Opendocument (ODF:ODT)',tool)
+ tell.green_hi_blue unless @opt.cmd =~/q/
+ tell=SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{@env.path.output_tell}/#{@md.fnb}/#{opendoc}")
+ tell.flow if @opt.cmd =~/[MV]/
+ my_make=SiSU_Env::Create_file.new(@opt.cmd,@opt.fns)
+ @dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here
+ SiSU_ODF::Source::Scroll.new(@dal_array,@md).songsheet
+ SiSU_Env::Info_skin.new(@md).select
+ rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error
+ ensure
+ end
+ end
+ private
+ class Split_text_object <Source
+ require "#{SiSU_lib}/odf_format"
+ include SiSU_Viz
+ include OD_format
+ @@dp=nil
+ @@alt_id_count=0
+ attr_reader :format,:lev,:text,:ocn,:lev_para_ocn
+ def initialize(para)
+ @para=para
+ @format,@ocn='null','null'
+ #@format,@ocn=nil,nil
+ @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern
+ end
+ def lev_segname_para_ocn
+ @text=nil
+ if @para =~/^(\d~|<:.+?>).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
+ if /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
+ @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,$5
+ elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
+ @format,@lev,@text,@ocn=$1,$2,$3,$4
+ elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
+ @format,@text,@ocn=$1,$2,$3
+ elsif /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;[um]\d+><#@dp:#@dp>$/m.match(@para)
+ @@alt_id_count+=1
+ @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,"x#{@@alt_id_count}"
+ elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
+ @@alt_id_count+=1
+ @format,@lev,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}"
+ end
+ else
+ if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
+ @text,@ocn=$1,$2
+ end
+ if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/
+ @text=/(.+?)/im.match(@para)[1]
+ end
+ if /^((\d)~(?:~\S+)?)\s+(.+)/m.match(@para)
+ @format,@lev,@text=$1,$2,$3
+ end
+ end
+ format=@format.dup
+ @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
+ OD_format::Format_text_object.new(format,@text,@ocn)
+ else
+ OD_format::Format_text_object.new(format,@text,"<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>")
+ end
+ self
+ end
+ end
+ class Scroll <Source
+ require "#{SiSU_lib}/shared_txt"
+ @@img_count=0
+ @@odf={ :body=>[],:open=>[],:close=>[],:head=>[],:metadata=>[],:tail=>[],:endnotes=>[] }
+ @@docstart=true
+ @@fns,@@dp=nil,nil
+ def initialize(data='',md='')
+ @data,@md=data,md
+ @env=SiSU_Env::Info_env.new(@md.fns)
+ @vz=SiSU_Env::Get_init.instance.skin
+ @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern
+ @regx=/^(?:(?:<:p[bn]>\s*)?\d~(?:(\S+))?\s+)?(.+?)\s*<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
+ @serial=/\s*<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>\s*/
+ @tab="\t"
+ @br=if @md.cmd =~/M/; "\n"
+ else ''
+ end
+ end
+ def songsheet
+ pre
+ @data=markup
+ post
+ publish
+ end
+ # Used for extraction of endnotes from paragraphs
+ def extract_endnotes(para='')
+ notes=para.scan(/~\{(\d+\s+.+?)\s*<#@dp>\}~/)
+ @n=[]
+ notes.each do |n| #high cost to deal with <br> appropriately within odf, consider
+ n=n.dup.to_s
+ if n =~/<br(?: \/)?>/
+ fix=n.split(/<br(?: \/)?>/) #watch #added
+ fix.each do |x|
+ if x =~/\S+/; @n << x
+ end
+ end
+ else @n << n
+ end
+ end
+ end
+ def odf_metadata(meta)
+ #meta.el,meta.txt,meta.type,meta.attrib
+ end
+ def odf_tail
+ generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})" if @md.sisu_version[:version]
+ lastdone="Last Generated on: #{Time.now}"
+ rubyv="Ruby version: #{@md.ruby_version}"
+ sc=if @md.sc_info
+ "Source file: #{@md.sc_filename}\nVersion number: #{@md.sc_number}\nVersion date: #{@md.sc_date}\n"
+ else ''
+ end
+ @@odf[:tail] << "\n</office:text></office:body></office:document-content>"
+ end
+ def heading(para,no)
+ para.gsub!(@serial,'')
+ para.gsub!(/<:name#\S+?>/,'')
+ para.gsub!(/^([1-6])~\S*\s/,'')
+ m=/#{$1}/
+ breakpage=''
+ if @md.fns and @md.fns != '' and @md.fns !=@@fns
+ @@docstart=true
+ @@fns=@md.fns
+ end
+ unless @@docstart
+ breakpage=if (@md.pagenew or @md.pagebreak) and (@md.pagenew =~ m or @md.pagebreak =~m); '<text:p text:style-name="P9"> </text:p>'
+ else ''
+ end
+ end
+ @@docstart=false
+ %{#{breakpage}<text:h text:style-name="Heading_20_#{no}" text:outline-level="#{no}">#{para}</text:h>}
+ end
+ def image_src(i)
+ image_source=if @md.fns =~/\._?ss[tm]$/ and FileTest.file?("#{@env.path.image_source_local_tex}/#{i}")
+ @env.path.image_source_local_tex
+ elsif @md.fns =~/\.-ss[tm]$/ and FileTest.file?("#{@env.path.image_source_remote_tex}/#{i}")
+ @env.path.image_source_remote_tex
+ elsif FileTest.file?("#{@env.path.image_source_tex}/#{i}")
+ @env.path.image_source_tex
+ else
+ tell=SiSU_Screen::Ansi.new(@md.cmd,"ERROR - image:",%{"#{i}" missing},"search locations: #{@env.path.image_source_local_tex},#{@env.path.image_source_remote_tex} and #{@env.path.image_source_tex}")
+ tell.error2 unless @md.cmd =~/q/
+ nil
+ end
+ end
+ def image_odf(img)
+ # copy image to od image directory (unless exists)
+ # divide pixel dimension by 37.79485 and retain 3 decimal places
+ # x=str.scan(/(aa[^a]+)/)
+ m,u=img[1],img[2]
+ i=/^(\S+?\.(?:png|jpg|gif))/.match(m).captures.join if m =~/^(\S+?\.(?:png|jpg|gif))/
+ c=/^\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"/.match(m).captures.join if m =~/^\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"/
+ w,h=/(\d+)x(\d+)/.match(m).captures if m =~/\d+x\d+/
+ w=(w.to_i/37.79485).to_s
+ h=(h.to_i/37.79485).to_s
+ h=/([0-9]+\.\d{0,3})/.match(h).captures.join
+ w=/([0-9]+\.\d{0,3})/.match(w).captures.join
+ image_source=image_src(i)
+ pwd=Dir.pwd
+ cp("#{image_source}/#{i}","#{@env.path.odf}/Pictures/#{i}") if image_source
+ img=if i.to_s =~/jpg|png|gif/ and h.to_s =~/\d/ and w.to_s =~/\d/
+ @@img_count +=1
+ %{<draw:frame draw:style-name="fr1" draw:name="graphics#{@@img_count}" text:anchor-type="as-char" svg:width="#{w}cm" svg:height="#{h}cm" draw:z-index="2"><draw:image xlink:href="Pictures/#{i}" xlink:type="simple" xlink:show="embed" xlink:actuate="onLoad"/></draw:frame>#{c}} #anchor-type: as-char or paragraph or char or ...
+ else %{<text:p text:style-name="P1">[image omitted]</text:p>}
+ end
+ end
+ def image(para)
+ para.gsub!(@serial,'')
+ m=para.scan(/(\{\s*(.+?)\}((?:https?|ftp)\S+|image))/)
+ if m; m.each do |i|
+ cont,url=i[1],i[2]
+ cont.gsub!(/([)(\]\[])/,"\\\\\\1")
+ cont.gsub!(/([+?])/,"\\\\\\1") # incorrect handling of +
+ url.gsub!(/([+?])/,"\\\\\\1")
+ para.sub!(/\{\s*#{cont}\}#{url}/m,image_odf(i)) #watch
+ para.gsub!(/\\([)(\]\[?])/,'\1') #clumsy fix
+ end
+ m=nil
+ end
+ para
+ end
+ def text_link_odf(txt,url)
+ txt.gsub!(/(\\\+)/,'+') #this is convoluted, and risky :-(
+ url.gsub!(/(\\\+)/,'+') #this is convoluted, and risky :-(
+ %{<text:a xlink:type="simple" xlink:href="#{url}">#{txt}</text:a>}
+ end
+ def text_link(para)
+ para.gsub!(@serial,'')
+ m=para.scan(/(\{([^}]+?)\}((?:https?|ftp)\S+))/) #sort
+ if m
+ m.each do |i|
+ txt,url=i[1],i[2]
+ txt.gsub!(/([)(\]\[])/,"\\\\\\1")
+ txt.gsub!(/([+?])/,"\\\\\\1") # problems with +
+ url.gsub!(/([+?])/,"\\\\\\1") # problems with +
+ para.gsub!(/\{\s*#{txt}\}#{url}/m,text_link_odf(txt,url))
+ para.gsub!(/\\([)(\]\[?])/,'\1') #clumsy fix
+ end
+ m=nil
+ end
+ para
+ end
+ def normal(para) #P1 - P3
+ para.gsub!(@serial,'')
+ para.gsub!(/(^|\s)(https?:\/\/[^'">< ]+)/,'\1<text:a xlink:type="simple" xlink:href="\2">\2</text:a>')
+ para.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/,'<text:a xlink:type="simple" xlink:href="mailto:\1">\1</text:a>')
+ par=case para
+ when /^<:i1>\s/m; para.gsub!(/^<:i1>\s/m,'')
+ %{<text:p text:style-name="P2">#{para}</text:p>}
+ when /^<:i2>\s/m; para.gsub!(/^<:i2>\s/m,'')
+ %{<text:p text:style-name="P3">#{para}</text:p>}
+ else %{<text:p text:style-name="P1">#{para}</text:p>} #%{<text:p text:style-name="Standard">#{para}</text:p>}
+ end
+ para=par #+ %{<text:p text:style-name="Standard"/>}
+ end
+ def fontface(para)
+ #para=para.gsub(/<b>(.+?)<\/b>/,%{<text:span text:style-name="T1">\\1</text:span>})
+ #para=para.gsub(/<i>(.+?)<\/i>/,%{<text:span text:style-name="T2">\\1</text:span>})
+ end
+ def footnote(para)
+ @astx||=10000
+ para.gsub!(/<#@dp>([}\]]~)/,'\1')
+ para.gsub!(/<br \/><:i1>/,'<br />')
+ if para =~/~\{\d+\s+/
+ para=para.gsub(/~\{(\d+)\s+(.+?)\}~/,'<text:note text:id="ftn\1" text:note-class="footnote"><text:note-citation>\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>')
+ end
+ if para=~/~\[[*+]\d+\s/ #editor notes, squre bracket series
+ asterisk=para.scan(/~\[([*+]\d+)\s+(.+?)\]~/)
+ asterisk.each do |x|
+ a=x[0].gsub(/([*+])/,"\\\\\\1")
+ para=para.gsub(/~\[(#{a})\s+(.+?)\]~/,%{<text:note text:id="ftn#{@astx.to_s}" text:note-class="footnote"><text:note-citation text:label="\\1">\\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \\2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>})
+ @astx+=1
+ end
+ end
+ if para=~/~\{[*+]+\s/
+ asterisk=para.scan(/~\{([*+]+)\s+(.+?)\}~/)
+ asterisk.each do |x|
+ a=x[0].gsub(/([*+])/,"\\\\\\1")
+ para=para.gsub(/~\{(#{a})\s+(.+?)\}~/,%{<text:note text:id="ftn#{@astx.to_s}" text:note-class="footnote"><text:note-citation text:label="\\1">\\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \\2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>})
+ @astx+=1
+ end
+ end
+ para
+ end
+ def group_clean(para)
+ para.gsub!(/&amp;nbsp;|&nbsp;/,'&#160;')
+ para.gsub!(/</,'&lt;')
+ para.gsub!(/>/,'&gt;')
+ #para.gsub!(/<br(?:\s+\/)?>/,'<br />')
+ para.gsub!(/&lt;br(?:\s+\/)?&gt;/,'<br />')
+ #para.gsub!(/\s\s/,'&#160;&#160;')
+ para
+ end
+ def poem(para) #P4 #same as group
+ para.gsub!(@serial,'')
+ para.gsub!(/<:verse(?:-end)?>\s*/m,'')
+ para=group_clean(para)
+ parray=[]
+ para.split(/<br(?: \/)?>/).each do |parablock|
+ parray << %{<text:p text:style-name="P4">#{parablock}</text:p>} if parablock =~/\S+/
+ end
+ para=parray.join + '<text:p text:style-name="Standard"/>'
+ end
+ def group(para) #P4 #same as verse
+ para.gsub!(@serial,'')
+ para.gsub!(/<:group(?:-end)?>\s*/m,'')
+ para=group_clean(para)
+ parray=[]
+ para.split(/<br(?: \/)?>/).each do |parablock|
+ parray << %{<text:p text:style-name="P4">#{parablock}</text:p>} if parablock =~/\S+/
+ end
+ para=parray.join + '<text:p text:style-name="Standard"/>'
+ end
+ def code(para) #P5
+ para.gsub!(@serial,'')
+ para.gsub!(/<:code(?:-end)?>\s*/m,'')
+ para=group_clean(para)
+ para.gsub!(/\s\s/,'&#160;&#160;')
+ parray=[]
+ para.split(/<:br>/).each do |parablock|
+ parray << %{<text:p text:style-name="P5">#{parablock}</text:p>} if parablock =~/\S+/
+ end
+ para=parray.join + '<text:p text:style-name="Standard"/>'
+ end
+ def table(para) #
+ if para =~/<!Th?.+/ # tables come as single block
+ table=OD_format::Table.new(@md,para)
+ para=table.table_split
+ end
+ end
+ def odf_structure(para='',lv='',ocn='',hname='') #% Used to extract the structure of a document
+ lv=lv.to_i
+ n=lv - 1
+ n3=lv + 2
+ lv=nil if lv == 0
+ #extract_endnotes(para)
+ #para=fontface(para)
+ para=if para =~/\{\s*\S+?\.(?:png|jpg|gif)\s.+?\}(?:(?:https?|ftp):\S+|image)/; image(para)
+ elsif para =~/\{.+?\}(?:(?:https?|ftp):\S+|image)/; text_link(para)
+ else para
+ end
+ para=footnote(para)
+ if lv
+ @@odf[:body] << case lv
+ when 1; heading(para,'1') << @br*2
+ when 2; heading(para,'2') << @br*2
+ when 3; heading(para,'3') << @br*2
+ when 4; heading(para,'4') << @br*2
+ when 5; heading(para,'5') << @br*2
+ when 6; heading(para,'6') << @br*2
+ end
+ elsif para =~ /<:verse(?:-end)?>/
+ @@odf[:body] << poem(para)
+ @@odf[:body] << @br*2
+ elsif para =~ /<:group(?:-end)?>/
+ @@odf[:body] << group(para)
+ @@odf[:body] << @br*2
+ elsif para =~ /<:code(?:-end)?>/
+ @@odf[:body] << code(para)
+ @@odf[:body] << @br*2
+ elsif para =~ /<!Th?/ #elsif para =~ /<!Th?¡/
+ @@odf[:body] << table(para)
+ @@odf[:body] << @br*2
+ else
+ @@odf[:body] << normal(para) # main text, contents, body KEEP
+ @@odf[:body] << @br*2
+ end
+ @@endnotes_para=[]
+ end
+ # Used to clean words
+ def tidywords(wordlist)
+ #wordlist.each do |x|
+ # #x.gsub!(/&/,'&amp;') unless x =~/&\S+;/
+ #end
+ wordlist
+ end
+ def markup # Used for major markup instructions
+ data=@data
+ safe_characters=/[^a-zA-Z0-9}{\/?,."';:)(><\-_&!@%~#\]\[*=$| \n+`¡]/
+ dir=SiSU_Env::Info_env.new(@md.fns)
+ @data_mod,@endnotes,@level,@cont,@copen,@odf_contents_close=Array.new(6){[]}
+ @rcdc=false
+ (0..6).each { |x| @cont[x]=@level[x]=false }
+ (4..6).each { |x| @odf_contents_close[x]='' }
+ odf_tail #($1,$2)
+ fix=[]
+ bullet=image_src('bullet_red.png')
+ cp("#{bullet}/bullet_red.png","#{@env.path.odf}/Pictures/.") #if image_src('bullet_red.png')
+ data.each do |para|
+ #p para if para =~safe_characters and @md.cmd =~/V/ #KEEP
+ #para.gsub!(/&lt;(~\d+;(?:\w|[0-6]:)\d+;\w\d+)&gt;&lt;(#@dp:#@dp)&gt;/,'<\1><\2>')
+ para_array=[]
+ word=para.scan(/\S+|\n/)
+ if word
+ word.each do |w| # _ - / # | : ! ^ ~
+ unless para =~/^(?:0~|%+ )/m
+ w.gsub!(/&#(?:126|152);/,'~') #126 usual
+ if w !~/&\S{1,7};/ or w =~/&nbsp;/; w.gsub!(/&/,'&amp;') #watch &nbsp;
+ end
+ end
+ para_array << w
+ end
+ para=para_array.join(' ')
+ para=para.strip
+ end
+ para.gsub!(/^(<:i[12]>\s+)?_\*\s+/,'\\1<draw:frame draw:style-name="gr1" text:anchor-type="as-char" svg:width="0.25cm" svg:height="0.25cm" draw:z-index="2"><draw:image xlink:href="Pictures/bullet_red.png" xlink:type="simple" xlink:show="embed" xlink:actuate="onLoad"/></draw:frame> ') # bullet_red.png
+ #para.gsub!(/^_\*\s+/,'<text:span text:style-name="T6">·</text:span> ') #bullet
+ para.gsub!(/^(<:i[12]>)\s+_\*\s+/,'\1 <text:span text:style-name="T6">·</text:span> ') #bullet
+ para.gsub!(/<br>/,'<br />')
+ para.gsub!(/<:p[bn]>/,'<text:p text:style-name="P8"> </text:p>')
+ para.gsub!(/&#169;/,'©') #too arbitrary
+ para.gsub!(/.+?<-#>/,'') # remove dummy headings (used by html) #check
+ #para.gsub!(/_\*\s+/,'* ') # bullet markup, marked down
+ para.gsub!(/<b>(.+?)<\/b>/,'<text:span text:style-name="T1">\1</text:span>')
+ para.gsub!(/<i>(.+?)<\/i>/,'<text:span text:style-name="T2">\1</text:span>')
+ para.gsub!(/<u>(.+?)<\/u>/,'<text:span text:style-name="T3">\1</text:span>')
+ para.gsub!(/<sup>(.+?)<\/sup>/,'<text:span text:style-name="T4">\1</text:span>')
+ para.gsub!(/<sub>(.+?)<\/sub>/,'<text:span text:style-name="T5">\1</text:span>')
+ para.gsub!(/`/,"'")
+ para.gsub!(/­/,'-')
+ para.gsub!(/·/,'*')
+ para.gsub!(/[“”]/,'""')
+ para.gsub!(/[­–—]/,'-') #— – chk
+ para.gsub!(/ < /i,'&#060;')
+ para.gsub!(/\\copy(?:right)?\b/,'&#169;')
+ para.gsub!(/\\trademark\b|\\tm\b/,'&#174;')
+ #para.gsub!(/\43/,'&#35;') ## watch
+ #para.gsub!(/$/,'&#36;') #$ watch
+ para.gsub!(/\44/,'&#36;') #$ watch
+ #para.gsub!(/^·/,'_*') #$ watch
+ #para.gsub!(/·/,'*') #$ watch
+ para.gsub!(/<:p[bn]>/,'') # remove page breaks
+ para.gsub!(/^\s*<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>/,'') # remove empty lines
+ para.gsub!(/<a href=".+?">(.+?)<\/a>/,'\1')
+ para.gsub!(/<:name#\S+?>/,'') # remove name links
+# para.gsub!(/<a href=".+?">(.+?)<\/a>/im,'\1')
+ #para.gsub!(/&nbsp;/,' ') # decide on
+ #para.gsub!(/\{(\S+?\.(?:png|jpg)) .+?\}(?:http:\/\/\S+|image)/," [ \\1 ]") #"[ #{@env.url.images_local}\/\\1 ]")
+ #para.gsub!(/<!TZ.+/,'')
+ #para.gsub!(/^<!T.+/,"#@br[table: ] <~#>");
+ #p para if para =~safe_characters and @md.cmd =~/M/ #KEEP
+ #para.gsub!(/^(\{\S+?\.(?:png|jpg)\s+.+?"(.*?)"\s*\}\S+)/,"\\1 \n [image: \"\\2\"]")
+ wordlist=para.scan(/\S+/)
+ para=tidywords(wordlist).join(' ').strip
+ if para =~/^0~(\S+)\s+(.+?)\Z/m # for headers
+ d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta
+ if d_meta; odf_metadata(d_meta)
+ end
+ end
+ @rcdc=true if @rcdc==false and (para =~/~metadata/ or para =~/1~\s+Document Information/)
+ if para !~/(^0~|<ENDNOTES>|<EOF>)/
+ if para =~@regx #/.+?<~\d+;\w\d+;\w\d+>.*/ #watch change
+ paranum=para[@regx,3]
+ @p_num=OD_format::Paragraph_number.new(paranum)
+ end
+ @sto=Split_text_object.new(para).lev_segname_para_ocn
+ #<office:annotation><dc:date>yyyy-mm-ddT00:00:00</dc:date><text:p>#{ocn}</text:p></office:annotation> #followed immediately by paragraph closure
+ ### problem in scroll, it appears tables are getting paragraph numbers
+ unless @rcdc
+ m=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
+ if para =~m and para=~/\S+/
+ para=case @sto.format
+ when /^(1)~(?:(\S+))?/; odf_structure(para,$1,@sto.ocn,$2)
+ para
+ when /^(2)~(?:(\S+))?/; odf_structure(para,$1,@sto.ocn,$2)
+ para
+ when /^(3)~(?:(\S+))?/; odf_structure(para,$1,@sto.ocn,$2)
+ para
+ when /^(4)~(\S+)/; odf_structure(para,$1,@sto.ocn,$2)
+ # work on see SiSU_text_parts::Split_text_object
+ para
+ when /^(5)~(?:(\S+))?/; odf_structure(para,$1,@sto.ocn,$2)
+ para
+ when /^(6)~(?:(\S+))?/; odf_structure(para,$1,@sto.ocn,$2)
+ para
+ #@sto.lev_para_ocn.heading_body6
+ #when /^(i1)$/
+ # #formatMono.gsubBody
+ # #para=@sto[:lev_para_ocn].scrIndent1
+ #when /^(i2)$/
+ # formatMono.gsubBody
+ # para=@sto[:lev_para_ocn].scrIndent2
+ #when /^(center)$/
+ # para.gsub!(/(.+)/,
+ # %{<center>(\\1)</center>})
+ # para=@sto[:lev_para_ocn].scrPara
+ #when /^(b|bold)$/
+ # para.gsub!(/(.+)/,
+ # %{<b>(\\1)</b>})
+ # para=@sto[:lev_para_ocn].scrPara
+ #when /null/ # see whether u can improve
+ # if (para !~/#{@margin.txt_0}|#{@margin.txt_1}|#{@margin.txt_2}/)
+ # #formatMono.gsubBody
+ # #para=@sto[:lev_para_ocn].scrPara
+ # end
+ else odf_structure(para,nil,nil,nil) #watch may be problematic
+ para
+ end
+ elsif para =~/(.*)<!#!>(.*)/
+ one,two=$1,$2
+ format_text=OD_format::Format_text_object.new(one,two)
+ para=format_text.seg_no_paranum
+ end
+ para='' if para =~/<a name="n\d+">/ and para =~/^(-\{{2}~\d+|<!e[:_]\d+!>)/ # -endnote
+ case para #remove
+ when /<:i1>/
+ if para =~/.*<:#>.*$/
+ format_text=OD_format::Format_text_object.new(para,'')
+ para=format_text.scr_indent_one_no_paranum
+ end
+ when /<:i2>/
+ if para =~/.*<:#>.*$/
+ format_text=OD_format::Format_text_object.new(para,'')
+ para=format_text.scr_indent_one_no_paranum
+ end
+ end
+ if (para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/)
+ # i don't get the condition for no paranum
+ end
+ if para =~/<:center>/
+ one,two=/(.*)<:center>(.*)/.match(para)[1,2]
+ format_text=OD_format::Format_text_object.new(one,two)
+ para=format_text.center
+ end
+ else
+ if para =~ /^(4)~(\S+)/
+ odf_structure(para,$1,@sto.ocn,$2)
+ para
+ elsif para =~/<~(\d+);m\d+;[mdv]\d+><#@dp:#@dp>$/
+ odf_structure(para,nil,nil,nil) #watch may be problematic
+ para
+ end
+ end
+ para.gsub!(/<!.+!>/,' ') if para ## Clean Prepared Text
+ para.gsub!(/<:\S+>/,' ') if para ## Clean Prepared Text
+ end
+ end
+ end
+ def pre
+ table=if @md.flag_tables
+ %{<style:style style:name="Table1" style:family="table"><style:table-properties style:width="16.999cm" table:align="margins"/></style:style>#@br} +
+ %{<style:style style:name="Table1.A" style:family="table-column"><style:table-column-properties style:column-width="16.999cm" style:rel-column-width="65535*"/></style:style>#@br} +
+ %{<style:style style:name="Table1.B" style:family="table-column"><style:table-column-properties style:column-width="8.499cm" style:rel-column-width="32767*"/></style:style>#@br} +
+ %{<style:style style:name="Table1.C" style:family="table-column"><style:table-column-properties style:column-width="5.666cm" style:rel-column-width="21845*"/></style:style>#@br} +
+ %{<style:style style:name="Table1.D" style:family="table-column"><style:table-column-properties style:column-width="4.349cm" style:rel-column-width="16383*"/></style:style>#@br} +
+ %{<style:style style:name="Table1.E" style:family="table-column"><style:table-column-properties style:column-width="3.399cm" style:rel-column-width="13107*"/></style:style>#@br} +
+ %{<style:style style:name="Table1.F" style:family="table-column"><style:table-column-properties style:column-width="2.833cm" style:rel-column-width="10922*"/></style:style>#@br} +
+ %{<style:style style:name="Table1.G" style:family="table-column"><style:table-column-properties style:column-width="2.428cm" style:rel-column-width="9362*"/></style:style>#@br} +
+ %{<style:style style:name="Table1.H" style:family="table-column"><style:table-column-properties style:column-width="2.124cm" style:rel-column-width="8191*"/></style:style>#@br} +
+ %{<style:style style:name="Table2" style:family="table"><style:table-properties style:width="16.999cm" table:align="margins"/></style:style>#@br} +
+ %{<style:style style:name="Table2.A" style:family="table-column"><style:table-column-properties style:column-width="16.999cm" style:rel-column-width="65535*"/></style:style>#@br} +
+ %{<style:style style:name="Table2.B" style:family="table-column"><style:table-column-properties style:column-width="8.499cm" style:rel-column-width="32767*"/></style:style>#@br} +
+ %{<style:style style:name="Table2.C" style:family="table-column"><style:table-column-properties style:column-width="5.666cm" style:rel-column-width="21845*"/></style:style>#@br} +
+ %{<style:style style:name="Table2.D" style:family="table-column"><style:table-column-properties style:column-width="4.349cm" style:rel-column-width="16383*"/></style:style>#@br} +
+ %{<style:style style:name="Table2.E" style:family="table-column"><style:table-column-properties style:column-width="3.999cm" style:rel-column-width="13107*"/></style:style>#@br} +
+ %{<style:style style:name="Table2.F" style:family="table-column"><style:table-column-properties style:column-width="2.833cm" style:rel-column-width="10922*"/></style:style>#@br} +
+ %{<style:style style:name="Table2.G" style:family="table-column"><style:table-column-properties style:column-width="2.428cm" style:rel-column-width="9362*"/></style:style>#@br} +
+ %{<style:style style:name="Table2.H" style:family="table-column"><style:table-column-properties style:column-width="2.124cm" style:rel-column-width="8191*"/></style:style>#@br} +
+ %{<style:style style:name="Table2.I" style:family="table-column"><style:table-column-properties style:column-width="1.8887cm" style:rel-column-width="7281*"/></style:style>#@br} +
+ %{<style:style style:name="Table2.J" style:family="table-column"><style:table-column-properties style:column-width="1.6999cm" style:rel-column-width="6553*"/></style:style>#@br} +
+ %{<style:style style:name="Table2.K" style:family="table-column"><style:table-column-properties style:column-width="1.5453cm" style:rel-column-width="5957*"/></style:style>#@br} +
+ %{<style:style style:name="Table2.L" style:family="table-column"><style:table-column-properties style:column-width="1.416cm" style:rel-column-width="5461*"/></style:style>#@br} +
+ %{<style:style style:name="Table2.M" style:family="table-column"><style:table-column-properties style:column-width="1.307" style:rel-column-width="5041*"/></style:style>#@br} +
+ %{<style:style style:name="Table2.N" style:family="table-column"><style:table-column-properties style:column-width="1.214cm" style:rel-column-width="4681*"/></style:style>#@br}
+ else ''
+ end
+ breakpage=if @md.pagenew or @md.pagebreak; ' fo:break-before="page"'
+ else ''
+ end
+ @@odf[:head]<<%{<?xml version="1.0" encoding="UTF-8"?>#@br} +
+ %{<office:document-content xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:ooo="http://openoffice.org/2004/office" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:xforms="http://www.w3.org/2002/xforms" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" office:version="1.0"><office:scripts/>#@br} +
+ %{<office:font-face-decls><style:font-face style:name="HG Mincho Light J" svg:font-family="&apos;HG Mincho Light J&apos;, &apos;MS Mincho&apos;, &apos;HG Mincho J&apos;, &apos;HG Mincho L&apos;, &apos;HG Mincho&apos;, Mincho, &apos;MS PMincho&apos;, &apos;HG Mincho Light J&apos;, &apos;MS Gothic&apos;, &apos;HG Gothic J&apos;, &apos;HG Gothic B&apos;, &apos;HG Gothic&apos;, Gothic, &apos;MS PGothic&apos;, &apos;Andale Sans UI&apos;, &apos;Arial Unicode MS&apos;, &apos;Lucida Sans Unicode&apos;, Tahoma" style:font-pitch="variable"/><style:font-face style:name="Nimbus Sans L" svg:font-family="&apos;Nimbus Sans L&apos;" style:font-pitch="variable"/><style:font-face style:name="Tahoma" svg:font-family="Tahoma, Lucidasans, &apos;Lucida Sans&apos;, &apos;Arial Unicode MS&apos;" style:font-pitch="variable"/><style:font-face style:name="Nimbus Roman No9 L" svg:font-family="&apos;Nimbus Roman No9 L&apos;" style:font-family-generic="roman" style:font-pitch="variable"/><style:font-face style:name="Bitstream Vera Sans" svg:font-family="&apos;Bitstream Vera Sans&apos;" style:font-family-generic="swiss" style:font-pitch="variable"/></office:font-face-decls>#@br} +
+ %{<office:automatic-styles>#@br} +
+ %{#{table}#@br} +
+ %{<style:style style:name="P1" style:family="paragraph" style:parent-style-name="Standard"><style:paragraph-properties fo:margin-top="0.199cm" fo:margin-bottom="0.199cm" fo:line-height="150%" fo:text-align="justify" style:justify-single-word="false"/></style:style>#@br} +
+ %{<style:style style:name="P2" style:family="paragraph" style:parent-style-name="Standard"><style:paragraph-properties fo:margin-top="0.199cm" fo:margin-bottom="0.199cm" fo:line-height="150%" fo:margin-left="1cm" fo:margin-right="0cm" fo:text-align="justify" style:justify-single-word="false" fo:text-indent="0cm" style:auto-text-indent="false"/></style:style>#@br} +
+ %{<style:style style:name="P3" style:family="paragraph" style:parent-style-name="Standard"><style:paragraph-properties fo:margin-top="0.199cm" fo:margin-bottom="0.199cm" fo:line-height="150%" fo:margin-left="2cm" fo:margin-right="0cm" fo:text-align="justify" style:justify-single-word="false" fo:text-indent="0cm" style:auto-text-indent="false"/></style:style>#@br} +
+ %{<style:style style:name="P4" style:family="paragraph" style:parent-style-name="Standard"><style:paragraph-properties fo:margin-top="0cm" fo:margin-bottom="0cm" fo:line-height="100%" fo:text-align="justify" style:justify-single-word="false"/></style:style>#@br} +
+ %{<style:style style:name="P5" style:family="paragraph" style:parent-style-name="Standard"><style:paragraph-properties fo:margin-top="0cm" fo:margin-bottom="0cm" fo:line-height="100%" fo:text-align="start" style:justify-single-word="false"/></style:style>#@br} +
+ %{<style:style style:name="P6" style:family="paragraph" style:parent-style-name="Standard"><style:paragraph-properties fo:break-before="page"/></style:style>#@br} +
+ %{<style:style style:name="P7" style:family="paragraph" style:parent-style-name="Table_20_Contents"><style:paragraph-properties fo:text-align="justify" style:justify-single-word="false"/></style:style>#@br} +
+ %{<style:style style:name="P8" style:family="paragraph" style:parent-style-name="Standard"><style:paragraph-properties fo:break-before="page"/></style:style>#@br} +
+ %{<style:style style:name="P9" style:family="paragraph" style:parent-style-name="Standard"><style:paragraph-properties fo:break-after="page"/></style:style>#@br} +
+ %{<style:style style:name="T1" style:family="text"><style:text-properties fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold"/></style:style>#@br} +
+ %{<style:style style:name="T2" style:family="text"><style:text-properties fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic"/></style:style>#@br} +
+ %{<style:style style:name="T3" style:family="text"><style:text-properties style:text-underline-style="solid" style:text-underline-width="auto" style:text-underline-color="font-color"/></style:style>#@br} +
+ %{<style:style style:name="T4" style:family="text"><style:text-properties style:text-position="super 58%"/></style:style>#@br} +
+ %{<style:style style:name="T5" style:family="text"><style:text-properties style:text-position="sub 58%"/></style:style>#@br} +
+ %{<style:style style:name="T6" style:family="text"><style:text-properties style:font-name="Courier 10 Pitch" fo:font-size="12pt"/></style:style>#@br} +
+ %{<style:style style:name="fr1" style:family="graphic" style:parent-style-name="Graphics"><style:graphic-properties style:wrap="none" style:horizontal-pos="left" style:horizontal-rel="paragraph" style:mirror="none" fo:clip="rect(0cm 0cm 0cm 0cm)" draw:luminance="0%" draw:contrast="0%" draw:red="0%" draw:green="0%" draw:blue="0%" draw:gamma="100%" draw:color-inversion="false" draw:image-opacity="100%" draw:color-mode="standard"/></style:style>#@br} +
+ %{<style:style style:name="gr1" style:family="graphic"><style:graphic-properties draw:stroke="none" draw:fill="none" draw:textarea-horizontal-align="center" draw:textarea-vertical-align="middle" draw:color-mode="standard" draw:luminance="0%" draw:contrast="0%" draw:gamma="100%" draw:red="0%" draw:green="0%" draw:blue="0%" fo:clip="rect(0cm 0cm 0cm 0cm)" draw:image-opacity="100%" style:mirror="none" style:run-through="background" style:wrap="none" style:vertical-pos="top" style:vertical-rel="baseline" style:horizontal-pos="left" style:horizontal-rel="paragraph" draw:wrap-influence-on-position="once-concurrent" style:flow-with-text="false"/></style:style>#@br} +
+ %{<style:style style:name="gr2" style:family="graphic"><style:graphic-properties draw:stroke="none" draw:fill="none" draw:textarea-horizontal-align="center" draw:textarea-vertical-align="middle" draw:color-mode="standard" draw:luminance="0%" draw:contrast="0%" draw:gamma="100%" draw:red="0%" draw:green="0%" draw:blue="0%" fo:clip="rect(0cm 0cm 0cm 0cm)" draw:image-opacity="100%" style:mirror="none" style:run-through="background" style:wrap="none" style:vertical-pos="middle" style:vertical-rel="baseline" style:horizontal-pos="left" style:horizontal-rel="paragraph" draw:wrap-influence-on-position="once-concurrent" style:flow-with-text="false"/></style:style>#@br} +
+ %{</office:automatic-styles>#@br} +
+ %{<office:body>#@br} +
+ %{<office:text><office:forms form:automatic-focus="false" form:apply-design-mode="false"/>#@br} +
+ %{<text:sequence-decls><text:sequence-decl text:display-outline-level="0" text:name="Illustration"/><text:sequence-decl text:display-outline-level="0" text:name="Table"/><text:sequence-decl text:display-outline-level="0" text:name="Text"/><text:sequence-decl text:display-outline-level="0" text:name="Drawing"/></text:sequence-decls>}
+ end
+ def post
+ end
+ def publish
+ divider='='
+ content=[]
+ data=@data
+ content << @@odf[:open]
+ content << @@odf[:head]
+ content << @@odf[:body]
+ content << @@odf[:metadata]
+ content << @@odf[:owner_details] if @md.stmp =~/\w+/ #not used?
+ content << @@odf[:tail]
+ Output.new(content,@md).odf
+ @@odf[:head],@@odf[:body],@@odf[:tail],@@odf[:metadata]=[],[],[],[]
+ end
+ end
+ class Output <Source
+ include SiSU_Param
+ include SiSU_Env
+ def initialize(content,md)
+ @content,@md=content,md
+ @env=SiSU_Env::Info_env.new(@md.fns)
+ end
+ def odf #%odf output
+ SiSU_Env::SiSU_file.new(@md).mkdir
+ #filename_odf=SiSU_Env::SiSU_file.new(@md,'content.xml').mkfile
+ filename="#{@env.path.odf}/content.xml"
+ od=File.new(filename,'w+')
+ @content.each do |para| # this is a hack
+ od.puts para unless para =~/\A\s*\Z/
+ end
+ od.close
+ opendoc=@md.fn[:odf]
+ system("
+ cd #{@env.path.odf}
+ zip -qr #{opendoc} *
+ mv #{opendoc} #{@env.path.output}/#{@md.fnb}/.
+ cd #{Dir.pwd}
+ ")
+ end
+ end
+ end
+end
+__END__
+todo:
+* table of contents
+* page header/footer?
+
+done:
+headings *
+footnotes *
+bold underscore italics strikethrough *
+superscript subscript *
+extended ascii set *
+indents *
+ [autonomy_markup0.sst] *
+groups
+ poem *
+ code *
+ tables
+images
+bullet
+line break
+page break
+
+notes?? [you could add a note number for every object/paragraph!]
+
+ cd(@env.path.odf)
+ structure=[]
+ Find.find(@env.path.odf) do |f|
+ structure << puts f
+ end
+ open(opendoc,'wb') do |f|
+ zip=Zlib::ZipWriter.new(f)
+ structure.each |z| do
+ zip << z
+ end
+ zip.close
+ end
+ #zip -qr #{opendoc} *
+ mv(opendoc,"#{@env.path.output}/#{@md.fnb}/.")
+ cd(Dir.pwd)