aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v2/plaintext.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/v2/plaintext.rb')
-rw-r--r--lib/sisu/v2/plaintext.rb419
1 files changed, 0 insertions, 419 deletions
diff --git a/lib/sisu/v2/plaintext.rb b/lib/sisu/v2/plaintext.rb
deleted file mode 100644
index ba146978..00000000
--- a/lib/sisu/v2/plaintext.rb
+++ /dev/null
@@ -1,419 +0,0 @@
-# coding: utf-8
-=begin
-
- * Name: SiSU
-
- * Description: a framework for document structuring, publishing and search
-
- * Author: Ralph Amissah
-
- * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved.
-
- * License: GPL 3 or later:
-
- SiSU, a framework for document structuring, publishing and search
-
- Copyright (C) Ralph Amissah
-
- This program is free software: you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the Free
- Software Foundation, either version 3 of the License, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program. If not, see <http://www.gnu.org/licenses/>.
-
- If you have Internet connection, the latest version of the GPL should be
- available at these locations:
- <http://www.fsf.org/licensing/licenses/gpl.html>
- <http://www.gnu.org/licenses/gpl.html>
-
- <http://www.jus.uio.no/sisu/gpl.fsf/toc.html>
- <http://www.jus.uio.no/sisu/gpl.fsf/doc.html>
- <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt>
-
- * SiSU uses:
- * Standard SiSU markup syntax,
- * Standard SiSU meta-markup syntax, and the
- * Standard SiSU object citation numbering and system
-
- * Hompages:
- <http://www.jus.uio.no/sisu>
- <http://www.sisudoc.org>
-
- * Download:
- <http://www.jus.uio.no/sisu/SiSU/download.html>
-
- * Ralph Amissah
- <ralph@amissah.com>
- <ralph.amissah@gmail.com>
-
- ** Description: plaintext text generation, stripped plaintext output (unix,
- linefeed)
-
-=end
-module SiSU_Plaintext
- require "#{SiSU_lib}/dal" # dal.rb
- require "#{SiSU_lib}/sysenv" # sysenv.rb
- include SiSU_Env
- require "#{SiSU_lib}/plaintext_format" # plaintext_format.rb
- include SiSU_Plaintext_format
- require "#{SiSU_lib}/shared_metadata" # shared_metadata.rb
- require "#{SiSU_lib}/shared_txt" # shared_txt.rb
- include SiSU_Param
- include SiSU_Viz
- pwd=Dir.pwd
- @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0
- @@tablefoot=''
- class Source
- def initialize(opt)
- @opt=opt
- unless @opt.fns =~/(.+?)\.(?:-|ssm\.)?sst$/
- puts "#{sf} not a processed file type"
- end
- end
- def read
- begin
- md=SiSU_Param::Parameters.new(@opt).get
- env=SiSU_Env::Info_env.new(@opt.fns)
- unless @opt.cmd =~/q/
- path=env.path.output_tell
- tool=(@opt.cmd =~/[MVv]/) \
- ? "#{env.program.text_editor} #{path}/#{md.fnb}/#{md.fn[:plain]}" \
- : @opt.fns
- @opt.cmd=~/[MVvz]/ \
- ? SiSU_Screen::Ansi.new(@opt.cmd,'Plaintext',tool).green_hi_blue \
- : SiSU_Screen::Ansi.new(@opt.cmd,'Plaintext',tool).green_title_hi
- SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{path}/#{md.fnb}/#{md.fn[:plain]}").flow if @opt.cmd =~/[MV]/
- end
- my_make=SiSU_Env::Create_file.new(@opt.cmd,@opt.fns)
- dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here
- wrap_width=if defined? md.make.plaintext_wrap \
- and md.make.plaintext_wrap
- md.make.plaintext_wrap
- elsif defined? env.plaintext_wrap \
- and env.plaintext_wrap
- env.plaintext_wrap
- else 78
- end
- #wrap_width=(defined? md.make.plaintext_wrap) ? md.make.plaintext_wrap : 78
- SiSU_Plaintext::Source::Scroll.new(dal_array,md,wrap_width).songsheet
- SiSU_Env::Info_skin.new(md).select #watch
- rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error
- ensure
- end
- end
- private
- class Scroll <Source
- require "#{SiSU_lib}/defaults" # defaults.rb
- require "#{SiSU_lib}/shared_txt" # shared_txt.rb
- include SiSU_text_utils
- @@endnotes={ :para=>[],:end=>[] }
- def initialize(data,md,wrap_width)
- @data,@md,@wrap_width=data,md,wrap_width
- @brace_url=SiSU_Viz::Skin.new.url_decoration
- @tab="\t"
- @@endnotes_=case md.mod.inspect
- when /--footnote/; false
- when /--endnote/; true
- else true
- end
- @br=case md.mod.inspect
- when /--dos/; "\r\n"
- when /--unix/; "\n"
- else "\n"
- end
- @plaintext={ :body=>[],:open=>[],:close=>[],:head=>[],:metadata=>[],:tail=>[] }
- end
- def songsheet
- plaintext=markup(@data)
- publish(plaintext)
- end
- # Used for extraction of endnotes from paragraphs
- def extract_endnotes(dob='')
- notes=dob.obj.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/)
- @n=[]
- notes.flatten.each do |n| #high cost to deal with <br> appropriately within plaintext, consider
- n=n.dup.to_s
- if n =~/#{Mx[:br_line]}|#{Mx[:br_nl]}/
- fix = n.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/) #watch #added
- fix.each do |x|
- unless x.empty?; @n << x
- end
- end
- else @n << n
- end
- end
- notes=@n.flatten
- notes.each do |e|
- util=(e.to_s =~/^\[[\d*+]+\]:/) \
- ? (SiSU_text_utils::Wrap.new(e.to_s,@wrap_width,4,1)) \
- : (SiSU_text_utils::Wrap.new(e.to_s,@wrap_width,1,1))
- wrap=util.line_wrap
- if wrap =~ /^\s*[\d*+]+\s+.+?\s*\Z/m
- wrap.gsub!(/^(\s*)([\d*+]+)\s+(.+?)\s*\Z/m, <<GSUB
-\\1[\\2]: \\3
-GSUB
- )
- else
- wrap.gsub!(/^(.+)\Z/m, <<GSUB
-\\1
-GSUB
- )
- end
- @@endnotes[:para] << "-#{wrap}"
- @@endnotes[:end] << '' << wrap
- end
- @@endnotes
- end
- def plaintext_metadata
- array=Metadata::Summary.new(@md).plaintext.metadata
- array.each do |meta|
- tag,inf=meta.scan(/^.+?:\s|.+/)
- if tag and inf
- util=SiSU_text_utils::Wrap.new(inf,@wrap_width,15,1)
- txt=util.line_wrap
- @plaintext[:metadata] <<<<WOK
-
-#{@tab}#{tag}#{txt}
-WOK
- end
- end
- end
- def plaintext_tail
- SiSU_Env::Info_skin.new(@md).select
- env=SiSU_Env::Info_env.new(@md.fns)
- vz=SiSU_Env::Get_init.instance.skin
- base_url="#{env.url.root}/#{@md.fnb}"
- generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})" if @md.sisu_version[:version]
- lastdone="Last Generated on: #{Time.now}"
- rubyv="Ruby version: #{@md.ruby_version}"
- sc=if @md.sc_info
- "Source file: #{@md.sc_filename}#{@br}Version number: #{@md.sc_number}#{@br}Version date: #{@md.sc_date}#{@br}"
- else ''
- end
- @plaintext[:tail] <<<<WOK
-#{@br}
-plaintext (plain text):
- #{base_url}/#{@md.fn[:plain]}#{@br}
-Other versions of this document: #{@br}
-manifest:
- #{base_url}/#{@md.fn[:manifest]}#{@br}
-at:
- #{env.url.root}#{@br}
-
-#{sc}
-* #{generator}
-* #{rubyv}
-* #{lastdone}
-* SiSU #{vz.url_sisu}
-WOK
- end
- def plaintext_structure(dob='') #% Used to extract the structure of a document
- lv=n=n3=nil
- if dob.is=='heading'
- lv=dob.ln
- n=lv - 1
- n3=lv + 2
- end
- util=nil
- wrapped=if dob.is =='para' \
- or dob.is=='heading'
- if dob.is=='para'
- if dob.indent =~/[1-9]/
- util=if dob.bullet_
- SiSU_text_utils::Wrap.new("* #{dob.obj}",@wrap_width,dob.indent.to_i*2)
- else SiSU_text_utils::Wrap.new(dob.obj,@wrap_width,dob.indent.to_i*2)
- end
- else
- util=if dob.bullet_
- SiSU_text_utils::Wrap.new("* #{dob.obj}",@wrap_width,0)
- else SiSU_text_utils::Wrap.new(dob.obj,@wrap_width,0)
- end
- end
- else util=SiSU_text_utils::Wrap.new(dob.obj,@wrap_width,0)
- end
- util.line_wrap
- end
- if lv
- times=wrapped.length
- times=@wrap_width if times > @wrap_width
- @plaintext[:body] << case lv
- when 1; wrapped.upcase << @br << Px[:lv1]*times << @br
- when 2; wrapped.upcase << @br << Px[:lv2]*times << @br
- when 3; wrapped.upcase << @br << Px[:lv3]*times << @br
- #when 2..3; wrapped.upcase << @br << Px[:lv2_3]*times << @br
- when 4; wrapped.upcase << @br << Px[:lv4]*times << @br
- when 5; wrapped.upcase << @br << Px[:lv5]*times << @br
- when 6; wrapped.upcase << @br << Px[:lv6]*times << @br
- #when 5..6; wrapped.upcase << @br << Px[:lv5_6]*times << @br
- end
- else
- @plaintext[:body] << wrapped << @br # main text, contents, body KEEP
- end
- if @@endnotes[:para] \
- and not @@endnotes_
- @plaintext[:body] << @br
- @@endnotes[:para].each {|e| @plaintext[:body] << e << @br}
- elsif @@endnotes[:para] \
- and @@endnotes_
- @plaintext[:body] << @br*2
- end
- @@endnotes[:para]=[]
- end
- def markup(data) # Used for major markup instructions
- dir=SiSU_Env::Info_env.new(@md.fns)
- @data_mod,@endnotes,@level,@cont,@copen,@plaintext_contents_close=Array.new(6){[]}
- (0..6).each { |x| @cont[x]=@level[x]=false }
- (4..6).each { |x| @plaintext_contents_close[x]='' }
- plaintext_tail #($1,$2)
- plaintext_metadata
- table_message='[table omitted, see other document formats]'
- fix=[]
- data.each do |dob|
- dob.obj.gsub!(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}.+/um,"#{@br}#{table_message}") #fix
- dob.obj.gsub!(/.+?#{Mx[:gl_o]}-##{Mx[:gl_c]}/,'') # remove dummy headings (used by html) #check also [~-]#
- dob.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,
- "#{Px[:bold_o]}\\1#{Px[:bold_c]}")
- dob.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,
- "#{Px[:italics_o]}\\1#{Px[:italics_c]}")
- dob.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,
- "#{Px[:underscore_o]}\\1#{Px[:underscore_c]}")
- dob.obj.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,
- "#{Px[:subscript_o]}\\1#{Px[:subscript_c]}")
- dob.obj.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,
- "#{Px[:superscript_o]}\\1#{Px[:superscript_c]}")
- dob.obj.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,
- "#{Px[:insert_o]}\\1#{Px[:insert_c]}")
- dob.obj.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,
- "#{Px[:cite_o]}\\1#{Px[:cite_c]}")
- dob.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,
- "#{Px[:strike_o]}\\1#{Px[:strike_c]}")
- dob.obj.gsub!(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,
- "#{Px[:monospace_o]}\\1#{Px[:monospace_c]}")
- unless dob.is=='code'
- dob.obj.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}/,'\1')
- dob.obj.gsub!(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1')
- dob.obj.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1 [link: <\2>]')
- dob.obj.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}image/,'\1 [link: local image]')
- dob.obj.gsub!(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,"#{@brace_url.txt_open}\\1#{@brace_url.txt_close}")
- extract_endnotes(dob)
- dob.obj.gsub!(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'[^\1]') # endnote marker marked up
- dob.obj.gsub!(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'[^\1]') # endnote marker marked up
- dob.obj.gsub!(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<')
- dob.obj.gsub!(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>')
- dob.obj.gsub!(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&')
- dob.obj.gsub!(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!')
- dob.obj.gsub!(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#')
- dob.obj.gsub!(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*')
- dob.obj.gsub!(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-')
- dob.obj.gsub!(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/')
- dob.obj.gsub!(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_')
- dob.obj.gsub!(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{')
- dob.obj.gsub!(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}')
- dob.obj.gsub!(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~')
- dob.obj.gsub!(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©')
- end
- if dob.of=='group' # watch
- dob.obj.gsub!(/#{Mx[:gl_o]}●#{Mx[:gl_c]}/,"* ")
- dob.obj.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n")
- else dob.obj.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n\n")
- end
- if dob.is=='code'
- dob.obj.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _> _<
- dob.obj.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _<_<
- end
- dob.obj.gsub!(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1')
- dob.obj.gsub!(/<a href=".+?">(.+?)<\/a>/m,'\1')
- dob.obj.gsub!(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'') # remove name links
- dob.obj.gsub!(/&nbsp;|#{Mx[:nbsp]}/,' ') # decide on
- dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]")
- dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}image/,' [ \1 ]')
- dob.obj.gsub!(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]')
- wordlist=dob.obj.scan(/\S+/)
- if dob.obj !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/
- if defined? dob.ocn and dob.ocn.to_s =~/\d+/
- paranum=dob.ocn.to_s
- @p_num=SiSU_Plaintext_format::Paragraph_number.new(paranum)
- end
- if dob.is=='heading' \
- or dob.is=='para'
- plaintext_structure(dob)
- elsif dob.is=='group' \
- or dob.is=='verse' \
- or dob.is=='code' \
- or dob.is=='table'
- @plaintext[:body] << dob.obj << @br
- elsif dob.is=='break'
- sp=' '
- ln='-'
- @plaintext[:body] <<=if dob.obj==Mx[:br_page] \
- or dob.obj==Mx[:br_page_new]
- "#{@br}#{ln*40}#{@br*2}"
- elsif dob.obj ==Mx[:obj_ln_sep]
- "#{@br}#{sp*20}* * *#{@br*2}"
- end # following empty line (@br) missing, fix
- end
- dob='' if (dob.obj =~/<a name="n\d+">/ \
- and dob.obj =~/^(-\{{2}~\d+|<!e[:_]\d+!>)/) # -endnote
- dob.obj.gsub!(/<!.+!>/,' ') if dob ## Clean Prepared Text
- dob.obj.gsub!(/<:\S+>/,' ') if dob ## Clean Prepared Text
- end
- end
- @plaintext
- end
- def publish(plaintext)
- divider='='
- content=[]
- content << plaintext[:open]
- content << plaintext[:head]
- content << plaintext[:body]
- content << @@endnotes[:end] if @@endnotes_
- content << "#{@br}#{divider*@wrap_width}#{@br}"
- content << plaintext[:metadata]
- content << "#{@br}#{divider*@wrap_width}#{@br}" if @md.stmp =~/\w+/ #not used?
- content << plaintext[:tail]
- Output.new(content,@md).plaintext
- @@endnotes={ :para=>[],:end=>[] }
- end
- end
- class Output <Source
- include SiSU_Param
- include SiSU_Env
- def initialize(content,md)
- @content,@md=content,md
- end
- def plaintext #%plaintext output
- SiSU_Env::SiSU_file.new(@md).mkdir
- file_plaintext=SiSU_Env::SiSU_file.new(@md,@md.fn[:plain]).mkfile
- @sisu=[]
- emptyline=0
- @content.each do |para| # this is a hack
- if para.class==Array \
- and para.length > 0
- para.each do |line|
- if line
- line.gsub!(/\s+$/m,'')
- line.gsub!(/^\A[ ]*\Z/m,'')
- if line=~/^\A[ ]*\Z/m
- emptyline+=1
- else emptyline=0
- end
- file_plaintext.puts line if emptyline < 2 #remove extra line spaces (fix upstream)
- end
- end
- else file_plaintext.puts para #unix plaintext # /^([*=-]|\.){5}/
- end
- end
- file_plaintext.close
- end
- end
- end
-end
-__END__
-&#033;\|&#035;\|&&#042;\|&#045;\|&#047;\|&#095;\|&#123;\|&#125;\|&#126;\|&#