# coding: utf-8 =begin * Name: SiSU * Description: a framework for document structuring, publishing and search * Author: Ralph Amissah * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Ralph Amissah All Rights Reserved. * License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . If you have Internet connection, the latest version of the GPL should be available at these locations: * SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system * Hompages: * Download: * Ralph Amissah ** Description: plaintext text generation, stripped plaintext output (unix, linefeed) =end module SiSU_Plaintext require "#{SiSU_lib}/dal" require "#{SiSU_lib}/sysenv" include SiSU_Env include SiSU_Param include SiSU_Viz require "#{SiSU_lib}/plaintext_format" include Format require "#{SiSU_lib}/shared_txt" require "#{SiSU_lib}/shared_structure" pwd=Dir.pwd @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0 @@tablefoot='' class Source def initialize(opt) @opt=opt @@dostype=if @opt.fns =~/(.+?)\.(?:-|ssm\.)?sst$/ if @opt.mod.inspect =~ /--footnote/ \ and @opt.mod.inspect =~ /--dos/ 'msdos footnotes' elsif @opt.mod.inspect =~ /--endnote/ \ and @opt.mod.inspect =~ /--dos/ 'msdos endnotes' elsif @opt.mod.inspect =~ /--footnote/ 'unix footnotes' elsif @opt.mod.inspect =~ /--endnote/ 'unix endnotes' else 'unix footnotes' end else puts "#{sf} not a processed file type" end end def read begin @md=SiSU_Param::Parameters.new(@opt).get @env=SiSU_Env::Info_env.new(@opt.fns) path=@env.path.output_tell tool=if @opt.cmd =~/[MVv]/; "#{@env.program.text_editor} #{path}/#{@md.fnb}/#{@md.fn[:plain]}" else '' end tell=SiSU_Screen::Ansi.new(@opt.cmd,'Plaintext',tool) tell.green_hi_blue unless @opt.cmd =~/q/ tell=SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{@env.path.output_tell}/#{@md.fnb}/#{@md.fn[:plain]}") tell.flow if @opt.cmd =~/[MV]/ my_make=SiSU_Env::Create_file.new(@opt.cmd,@opt.fns) @dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here SiSU_Plaintext::Source::Scroll.new(@dal_array,@md).songsheet SiSU_Env::Info_skin.new(@md).select #watch rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error ensure end end private class Scroll [],:end=>[] } @@dp=nil def initialize(data,md) @data,@md=data,md @url_brace=SiSU_Viz::Skin.new.url_decoration @vz=SiSU_Env::Get_init.instance.skin @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern @regx=/^(?:(?:#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}\s*)?#{Mx[:lv_o]}\d:(\S*?)#{Mx[:lv_c]})?\s*(.+?)\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m # 2004w18 pb pn removal added @tab="\t" @br=if md.mod.inspect =~ /--footnote/ \ and md.mod.inspect =~ /--dos/ @@dostype='msdos footnotes' "\r\n" elsif md.mod.inspect =~ /--endnote/ \ and md.mod.inspect =~ /--dos/ @@dostype='msdos endnotes' "\r\n" elsif md.mod.inspect =~ /--footnote/ @@dostype='unix footnotes' "\n" elsif md.mod.inspect =~ /--endnote/ @@dostype='unix endnotes' "\n" else @@dostype='unix footnotes' "\n" end @plaintext={ :body=>[],:open=>[],:close=>[],:head=>[],:metadata=>[],:tail=>[] } end def songsheet plaintext=markup(@data) publish(plaintext) end # Used for extraction of endnotes from paragraphs def extract_endnotes(para='') notes=para.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/) @n=[] notes.flatten.each do |n| #high cost to deal with
appropriately within plaintext, consider n=n.dup.to_s if n =~/#{Mx[:br_line]}|#{Mx[:br_nl]}/ fix = n.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/) #watch #added fix.each do |x| unless x.empty?; @n << x end end else @n << n end end notes=@n.flatten notes.each do |e| util=if e.to_s =~/^\[[\d*+]+\]:/; SiSU_text_utils::Wrap.new(e.to_s,78,4,1) else SiSU_text_utils::Wrap.new(e.to_s,78,1,1) end wrap=util.line_wrap if wrap =~ /^\s*[\d*+]+\s+.+?\s*\Z/m wrap.gsub!(/^(\s*)([\d*+]+)\s+(.+?)\s*\Z/m, < 78 @plaintext[:body] << case lv when 1; wrapped.upcase << @br << '*'*times << @br when 2..3; wrapped.upcase << @br << '='*times << @br when 4; wrapped.upcase << @br << '-'*times << @br when 5..6; wrapped.upcase << @br << '.'*times << @br end else @plaintext[:body] << wrapped << @br # main text, contents, body KEEP end if @@endnotes[:para] \ and @@dostype =~/footnote/ #edit out to switch off endnotes following paragraph to which they belong @plaintext[:body] << @br @@endnotes[:para].each {|e| @plaintext[:body] << e << @br} elsif @@endnotes[:para] \ and @@dostype =~/endnote/ @plaintext[:body] << @br*2 end @@endnotes[:para]=[] end def markup(data) # Used for major markup instructions dir=SiSU_Env::Info_env.new(@md.fns) @data_mod,@endnotes,@level,@cont,@copen,@plaintext_contents_close=Array.new(6){[]} (0..6).each { |x| @cont[x]=@level[x]=false } (4..6).each { |x| @plaintext_contents_close[x]='' } plaintext_tail #($1,$2) table_message='[table omitted, see other document formats]' fix=[] data.each do |para| para.gsub!(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}.+/um,"#@br#{table_message}") para.gsub!(/.+?#{Mx[:gl_o]}-##{Mx[:gl_c]}/,'') # remove dummy headings (used by html) #check para.gsub!(/#{Mx[:gl_bullet]}\s*/,'* ') # bullet markup, marked down para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'*\1*') para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'/\1/') para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'[\1]') para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'_\1_') para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'^\1^') para.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+\1+') para.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"\1"') para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'-\1-') unless para =~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/ para.gsub!(/\{(.+?)\}((?:https?|file|ftp):\/\/\S+|image)/,'\1 [link:] \2') para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.txt_open}\\2#{@url_brace.txt_close}\\3") para.gsub!(/_((?:https?|file|ftp):\/\/\S+)/,'\1') extract_endnotes(para) para.gsub!(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'[^\1]') # endnote marker marked up para.gsub!(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'[^\1]') # endnote marker marked up para.gsub!(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<') para.gsub!(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>') para.gsub!(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&') para.gsub!(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!') para.gsub!(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#') para.gsub!(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*') para.gsub!(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-') para.gsub!(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/') para.gsub!(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_') para.gsub!(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{') para.gsub!(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}') para.gsub!(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~') para.gsub!(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©') end if para =~/#{Mx[:gr_o]}(?:group|verse|alt|code)(?:-end)?#{Mx[:gr_c]}(?:\s+#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/ ##{Mx[:gr_o]}codeline#{Mx[:gr_c]} if para =~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/ #code-block: angle brackets special characters para.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _> _< para.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _<_< end para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n") # watch para.gsub!(/#{Mx[:gr_o]}(?:group|verse|alt|code)(?:-end)?#{Mx[:gr_c]}(?:\s+#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/,'') else para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n\n") # watch introduces a bug end para.gsub!(/#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}/,'') # remove page breaks para.gsub!(/^\s*#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/,'') # remove empty lines - check para.gsub!(/(^|#{Mx[:gl_c]}|\s)[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') para.gsub!(/(.+?)<\/a>/m,'\1') para.gsub!(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'') # remove name links para.gsub!(/ /,' ') # decide on para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") para.gsub!(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') #para.gsub!(/^\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') wordlist=para.scan(/\S+/) if para =~/^#{Rx[:meta]}\s*(.+?)\Z/m # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta if d_meta; plaintext_metadata(d_meta) end end if para !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ if para =~@regx #/.+?<~\d+;\w\d+;\w\d+>.*/ #watch change paranum=para[@regx,3] @p_num=Format::Paragraph_number.new(paranum) end @sto=SiSU_Structure::Split_text_object.new(@md,para).txt ### problem in scroll, it appears tables are getting paragraph numbers m=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ if para =~m \ and para=~/\S+/ para=case @sto.format when /^(1):(\S*?)/ plaintext_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body1 when /^(2):(\S*?)/ plaintext_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body2 when /^(3):(\S*?)/ plaintext_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body3 when /^(4):(\S+?)/ # work on see SiSU_text_parts::Split_text_object plaintext_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body4 when /^(5):(\S*?)/ plaintext_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body5 when /^(6):(\S*?)/ plaintext_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body6 #when /^(i1)$/ # #formatMono.gsubBody # #para=@sto[:lev_para_ocn].scrIndent1 #when /^(i2)$/ # formatMono.gsubBody # para=@sto[:lev_para_ocn].scrIndent2 #when /^(center)$/ # para.gsub!(/(.+)/, # %{
(\\1)
}) # para=@sto[:lev_para_ocn].scrPara #when /^(b|bold)$/ # para.gsub!(/(.+)/, # %{(\\1)}) # para=@sto[:lev_para_ocn].scrPara #when /null/ # see whether u can improve # if (para !~/#{@margin.txt_0}|#{@margin.txt_1}|#{@margin.txt_2}/) # #formatMono.gsubBody # #para=@sto[:lev_para_ocn].scrPara # end else plaintext_structure(para,nil,nil,nil) #watch may be problematic para end elsif para =~/#{table_message}/ @plaintext[:body] << para << @br elsif para =~/(Note|Endnotes?)/ \ and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ elsif para =~/(MetaData)/ \ and para =~/#{Mx[:id_o]}~(\d+);[um]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #debug 2003w46 add rc info ####suspect visit #formatMono=MonoSiSU.new('
MetaData') #para=formatMono.bold_para elsif para.include? 'Owner Details' \ and para !~/#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #formatMono=MonoSiSU.new('
Owner Details') #@plaintext[:owner_details]=formatMono.bold_para #para='' elsif para =~/(#{Mx[:tc_p]}|#{Mx[:gr_o]}Th?)/u #tables ! check elsif para =~/(.*)(.*)/ one,two=$1,$2 format_text=Format_text_object.new(one,two) para=format_text.seg_no_paranum end para='' if (para =~// \ and para =~/^(-\{{2}~\d+|)/) # -endnote case para when /#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/ if para =~/.*<:#>.*$/m format_text=Format_text_object.new(para,'') para=format_text.scr_indent_one_no_paranum end end if para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/ # i don't get the condition for no paranum end if para =~/<:center>/ one,two=/(.*)<:center>(.*)/.match(para)[1,2] format_text=Format_text_object.new(one,two) para=format_text.center end para.gsub!(/#{Mx[:id_o]}.+?#{Mx[:id_c]}/,' ') if para ## Clean Prepared Text para.gsub!(//,' ') if para ## Clean Prepared Text para.gsub!(/<:\S+>/,' ') if para ## Clean Prepared Text end end @plaintext end def publish(plaintext) divider='=' content=[] content << plaintext[:open] content << plaintext[:head] content << plaintext[:body] content << @@endnotes[:end] if @@dostype =~/endnotes/ content << "#@br#{divider*78}#@br" content << plaintext[:metadata] content << "#@br#{divider*78}#@br" if @md.stmp =~/\w+/ #not used? content << plaintext[:owner_details] if @md.stmp =~/\w+/ #not used? content << plaintext[:tail] Output.new(content,@md).plaintext @@endnotes={ :para=>[],:end=>[] } end end class Output 0 para.each do |line| line.gsub!(/\s+$/m,'') file_plaintext.puts line #unix plaintext end else file_plaintext.puts para #unix plaintext # /^([*=-]|\.){5}/ end end file_plaintext.close end end end end __END__ !\|#\|&*\|-\|/\|_\|{\|}\|~\|&#