# encoding: utf-8 =begin * Name: SiSU ** Description: documents, structuring, processing, publishing, search *** plaintext text generation, stripped plaintext output (unix, linefeed) ** Author: Ralph Amissah [ralph@amissah.com] [ralph.amissah@gmail.com] ** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Ralph Amissah, All Rights Reserved. ** License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see [http://www.gnu.org/licenses/]. If you have Internet connection, the latest version of the GPL should be available at these locations: [http://www.fsf.org/licensing/licenses/gpl.html] [http://www.gnu.org/licenses/gpl.html] ** SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system ** Hompages: [http://www.jus.uio.no/sisu] [http://www.sisudoc.org] ** Git [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary] [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/txt.rb;hb=HEAD] =end module SiSU_Txt_Plain require_relative 'ao' # ao.rb require_relative 'se' # se.rb include SiSU_Env require_relative 'shared_metadata' # shared_metadata.rb require_relative 'generic_parts' # generic_parts.rb require_relative 'txt_read' # txt_read.rb require_relative 'txt_shared' # txt_shared.rb require_relative 'txt_plain_decorate' # txt_plain_decorate.rb require_relative 'txt_output' # txt_output.rb include SiSU_Param @@alt_id_count,@@alt_id_count=0,0 @@tablefoot='' class Source include SiSU_Txt_Read def initialize(opt) @opt=opt unless @opt.fns =~/(.+?)\.(?:-|ssm\.)?sst$/ puts "#{sf} not a processed file type" end end def read begin md=SiSU_Param::Parameters.new(@opt).get specific={ description: 'Plaintext (utf-8)', output_path: md.file.output_path.txt.dir, output_file: md.file.base_filename.txt, } read_generic(@opt,specific) SiSU_Txt_Plain::Source::Scroll.new(md,@ao_array,@wrap_width).songsheet rescue SiSU_Errors::Rescued.new($!,$@,@opt.selections.str,@opt.fns).location do __LINE__.to_s + ':' + __FILE__ end ensure end end private class Scroll appropriately within plaintext, consider n=n.dup.to_s if n =~/#{Mx[:br_line]}|#{Mx[:br_nl]}/ fix = n.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/) #watch #added fix.each do |x| unless x.empty?; @n << x end end else @n << n end end notes=@n.flatten notes.each do |e| util=(e.to_s =~/^\[[\d*+]+\]:/) \ ? (SiSU_TextUtils::Wrap.new(e.to_s,@wrap_width,4,1)) : (SiSU_TextUtils::Wrap.new(e.to_s,@wrap_width,1,1)) wrap=util.line_wrap wrap=if wrap =~ /^\s*[\d*+]+\s+.+?\s*\Z/m wrap.gsub(/^(\s*)([\d*+]+)\s+(.+?)\s*\Z/m, <<-GSUB \\1[\\2]: \\3 GSUB ) else wrap.gsub(/^(.+)\Z/m, <<-GSUB \\1 GSUB ) end @@endnotes[:para] << "-#{wrap}" @@endnotes[:end] << '' << wrap end @@endnotes end def plaintext_metadata array=SiSU_Metadata::Summary.new(@md).plaintext.metadata array.each do |meta| tag,inf=meta.scan(/^.+?:\s|.+/) if tag and inf util=SiSU_TextUtils::Wrap.new(inf,@wrap_width,15,1) txt=util.line_wrap @plaintext[:metadata] <<< @wrap_width @plaintext[:body] << case lv when 0 then wrapped.upcase << break_line << decorate.heading_underscore.l0*times + p_num << break_line*2 when 1 then wrapped.upcase << break_line << decorate.heading_underscore.l1*times + p_num << break_line*2 when 2 then wrapped.upcase << break_line << decorate.heading_underscore.l2*times + p_num << break_line*2 when 3 then wrapped.upcase << break_line << decorate.heading_underscore.l3*times + p_num << break_line*2 when 4 unless dob.use_ == :dummy wrapped.upcase << break_line << decorate.heading_underscore.l4*times + p_num << break_line*2 end when 5 then wrapped.upcase << break_line << decorate.heading_underscore.l5*times + p_num << break_line*2 when 6 then wrapped.upcase << break_line << decorate.heading_underscore.l6*times + p_num << break_line*2 when 7 wrapped.upcase << break_line << decorate.heading_underscore.l7*times + p_num << break_line*2 #when 7 then wrapped.upcase << break_line << decorate.heading_underscore.l7*times + p_num << break_line*2 end else @plaintext[:body] << wrapped + p_num << break_line # main text, contents, body KEEP end if @@endnotes[:para] \ and not @@endnotes_ @@endnotes[:para].each {|e| @plaintext[:body] << e << break_line} elsif @@endnotes[:para] \ and @@endnotes_ end @@endnotes[:para]=[] end def ocn_display(dob) make=SiSU_Env::ProcessingSettings.new(@md) if make.build.plaintext_ocn? if defined? dob.ocn \ and dob.ocn.is_a?(Fixnum) (defined? dob.ocn) \ ? "\n#{Dx[:ocn_o]}#{dob.ocn}#{Dx[:ocn_c]}" \ : '' else '' end else '' end end def markup(data) # Used for major markup instructions SiSU_Env::InfoEnv.new(@md.fns) @data_mod,@endnotes,@level,@cont,@copen,@plaintext_contents_close=Array.new(6){[]} (0..7).each { |x| @cont[x]=@level[x]=false } (4..7).each { |x| @plaintext_contents_close[x]='' } plaintext_tail #($1,$2) plaintext_metadata table_message='[table omitted, see other document formats]' data.each do |dob| dob.obj=dob.obj.gsub(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}.+/um,"#{break_line}#{table_message}"). #fix gsub(/.+?#{Mx[:gl_o]}-##{Mx[:gl_c]}/,''). # remove dummy headings (used by html) #check also [~-]# gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/, "#{decorate.bold.open}\\1#{decorate.bold.close}"). gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/, "#{decorate.italics.open}\\1#{decorate.italics.close}"). gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/, "#{decorate.underscore.open}\\1#{decorate.underscore.close}"). gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/, "#{decorate.subscript.open}\\1#{decorate.subscript.close}"). gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/, "#{decorate.superscript.open}\\1#{decorate.superscript.close}"). gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/, "#{decorate.insert.open}\\1#{decorate.insert.close}"). gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/, "#{decorate.cite.open}\\1#{decorate.cite.close}"). gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/, "#{decorate.strike.open}\\1#{decorate.strike.close}"). gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/, "#{decorate.monospace.open}\\1#{decorate.monospace.close}") unless dob.is==:code dob.obj=dob.obj.gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}/,'\1'). gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1'). gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1 [link: <\2>]'). gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}image/,'\1 [link: local image]'). gsub(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,"#{the_text.url_open}\\1#{the_text.url_close}") extract_endnotes(dob) dob.obj=dob.obj.gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'[^\1]'). # endnote marker marked up gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'[^\1]'). # endnote marker marked up gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<'). gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>'). gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&'). gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!'). gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#'). gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*'). gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-'). gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/'). gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_'). gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{'). gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}'). gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~'). gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©'). gsub(/#{Mx[:gl_o]}#092#{Mx[:gl_c]}/,'\\') end dob.obj=if dob.of==:block # watch dob.obj.gsub(/#{Mx[:gl_o]}●#{Mx[:gl_c]}/m,"* "). gsub(/\n?#{Mx[:br_line]}\n?|\n?#{Mx[:br_nl]}\n?/m,break_line) else dob.obj.gsub(/\n?#{Mx[:br_line]}\n?|\n?#{Mx[:br_nl]}\n?/m,break_line*2) end if dob.is==:code dob.obj=dob.obj.gsub(/(^|[^}])_([<>])/m,'\1\2'). # _> _< gsub(/(^|[^}])_([<>])/m,'\1\2') # _<_< end dob.obj=dob.obj.gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1'). gsub(/(.+?)<\/a>/m,'\1'). gsub(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,''). # remove name links gsub(/ |#{Mx[:nbsp]}/,' '). # decide on gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [ \1 ]'). #"[ #{dir.url.images_local}\/\\1 ]") gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}image/,' [ \1 ]'). gsub(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') if dob.obj !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ p_num=ocn_display(dob) if dob.is==:heading \ or dob.is==:para plaintext_structure(dob,p_num) elsif dob.is==:group \ or dob.is==:block \ or dob.is==:verse \ or dob.is==:code \ or dob.is==:table @plaintext[:body] << dob.obj + p_num << break_line elsif dob.is==:break sp=' ' ln='-' @plaintext[:body] <<=if dob.obj==Mx[:br_page] \ or dob.obj==Mx[:br_page_new] \ or dob.obj==Mx[:br_page_line] "#{break_line}#{ln*40}#{break_line*2}" elsif dob.obj ==Mx[:br_obj] "#{break_line}#{sp*20}* * *#{break_line*2}" end # following empty line (break_line) missing, fix end dob='' if (dob.obj =~// \ and dob.obj =~/^(-\{{2}~\d+|)/) # -endnote if dob ## Clean Prepared Text dob.obj=dob.obj.gsub(//,' '). gsub(/<:\S+>/,' ') end end end @plaintext end def publish(plaintext) divider='=' content=[] content << plaintext[:open] content << plaintext[:head] content << plaintext[:body] content << @@endnotes[:end] if @@endnotes_ content << "#{break_line}#{divider*@wrap_width}#{break_line}" content << plaintext[:metadata] content << "#{break_line}#{divider*@wrap_width}#{break_line}" if @md.stmp =~/\w+/ #not used? content << plaintext[:tail] outputfile=SiSU_Env::FileOp.new(@md).write_file.txt Txt_Output::Output.new.document(content,outputfile) @@endnotes={ para: [], end: [] } end end end end __END__ bold_o: '*', bold_c: '*', #bold_o: '!', bold_c: '!', #emphasis_o: '*', emphasis_c: '*', italics_o: '/', italics_c: '/', underscore_o: '_', underscore_c: '_', cite_o: '"', cite_c: '"', insert_o: '+', insert_c: '+', strike_o: '-', strike_c: '-', superscript_o: '^', superscript_c: '^', subscript_o: '[', subscript_c: ']', hilite_o: '*', hilite_c: '*', monospace_o: '', monospace_c: '', p_bold_o: '!{', p_bold_c: '}!', p_italics_o: '/{', p_italics_c: '}/', p_underscore_o: '_{', p_underscore_c: '}_', p_cite_o: '"{', p_cite_c: '}"', p_insert_o: '+{', p_insert_c: '}+', p_strike_o: '-{', p_strike_c: '}-', p_superscript_o: '^{', p_superscript_c: '}^', p_subscript_o: ',{', p_subscript_c: '},', p_hilite_o: '*{', p_hilite_c: '}*', p_monospace_o: '#{', p_monospace_c: '}#',