From 7372f56054259457f77c64cbdb34e736531cfc0e Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 4 Jul 2009 11:57:29 -0400 Subject: move lib to version 1 directory, (lib/sisu/v1) and make related changes --- lib/sisu/v1/plaintext.rb | 448 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 448 insertions(+) create mode 100644 lib/sisu/v1/plaintext.rb (limited to 'lib/sisu/v1/plaintext.rb') diff --git a/lib/sisu/v1/plaintext.rb b/lib/sisu/v1/plaintext.rb new file mode 100644 index 00000000..dd2964d9 --- /dev/null +++ b/lib/sisu/v1/plaintext.rb @@ -0,0 +1,448 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2009 Ralph Amissah All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: plaintext text generation, stripped plaintext output (unix, + linefeed) + +=end +module SiSU_Plaintext + require "#{SiSU_lib}/dal" + require "#{SiSU_lib}/sysenv" + include SiSU_Env + include SiSU_Param + include SiSU_Viz + require "#{SiSU_lib}/plaintext_format" + include SiSU_Plaintext_format + require "#{SiSU_lib}/shared_txt" + require "#{SiSU_lib}/shared_structure" + pwd=Dir.pwd + @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0 + @@tablefoot='' + class Source + def initialize(opt) + @opt=opt + @@dostype=if @opt.fns =~/(.+?)\.(?:-|ssm\.)?sst$/ + if @opt.mod.inspect =~ /--footnote/ \ + and @opt.mod.inspect =~ /--dos/ + 'msdos footnotes' + elsif @opt.mod.inspect =~ /--endnote/ \ + and @opt.mod.inspect =~ /--dos/ + 'msdos endnotes' + elsif @opt.mod.inspect =~ /--footnote/ + 'unix footnotes' + elsif @opt.mod.inspect =~ /--endnote/ + 'unix endnotes' + else 'unix footnotes' + end + else puts "#{sf} not a processed file type" + end + end + def read + begin + @md=SiSU_Param::Parameters.new(@opt).get + @env=SiSU_Env::Info_env.new(@opt.fns) + path=@env.path.output_tell + tool=if @opt.cmd =~/[MVv]/; "#{@env.program.text_editor} #{path}/#{@md.fnb}/#{@md.fn[:plain]}" + else '' + end + tell=SiSU_Screen::Ansi.new(@opt.cmd,'Plaintext',tool) + tell.green_hi_blue unless @opt.cmd =~/q/ + tell=SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{@env.path.output_tell}/#{@md.fnb}/#{@md.fn[:plain]}") + tell.flow if @opt.cmd =~/[MV]/ + my_make=SiSU_Env::Create_file.new(@opt.cmd,@opt.fns) + @dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here + SiSU_Plaintext::Source::Scroll.new(@dal_array,@md).songsheet + SiSU_Env::Info_skin.new(@md).select #watch + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + ensure + end + end + private + class Scroll [],:end=>[] } + @@dp=nil + def initialize(data,md) + @data,@md=data,md + @url_brace=SiSU_Viz::Skin.new.url_decoration + @vz=SiSU_Env::Get_init.instance.skin + @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern + @regx=/^(?:(?:#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}\s*)?#{Mx[:lv_o]}\d:(\S*?)#{Mx[:lv_c]})?\s*(.+?)\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m # 2004w18 pb pn removal added + @tab="\t" + @br=if md.mod.inspect =~ /--footnote/ \ + and md.mod.inspect =~ /--dos/ + @@dostype='msdos footnotes' + "\r\n" + elsif md.mod.inspect =~ /--endnote/ \ + and md.mod.inspect =~ /--dos/ + @@dostype='msdos endnotes' + "\r\n" + elsif md.mod.inspect =~ /--footnote/ + @@dostype='unix footnotes' + "\n" + elsif md.mod.inspect =~ /--endnote/ + @@dostype='unix endnotes' + "\n" + else + @@dostype='unix footnotes' + "\n" + end + @plaintext={ :body=>[],:open=>[],:close=>[],:head=>[],:metadata=>[],:tail=>[] } + end + def songsheet + plaintext=markup(@data) + publish(plaintext) + end + # Used for extraction of endnotes from paragraphs + def extract_endnotes(para='') + notes=para.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/) + @n=[] + notes.flatten.each do |n| #high cost to deal with
appropriately within plaintext, consider + n=n.dup.to_s + if n =~/#{Mx[:br_line]}|#{Mx[:br_nl]}/ + fix = n.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/) #watch #added + fix.each do |x| + unless x.empty?; @n << x + end + end + else @n << n + end + end + notes=@n.flatten + notes.each do |e| + util=if e.to_s =~/^\[[\d*+]+\]:/; SiSU_text_utils::Wrap.new(e.to_s,78,4,1) + else SiSU_text_utils::Wrap.new(e.to_s,78,1,1) + end + wrap=util.line_wrap + if wrap =~ /^\s*[\d*+]+\s+.+?\s*\Z/m + wrap.gsub!(/^(\s*)([\d*+]+)\s+(.+?)\s*\Z/m, < 78 + @plaintext[:body] << case lv + when 1; wrapped.upcase << @br << '*'*times << @br + when 2..3; wrapped.upcase << @br << '='*times << @br + when 4; wrapped.upcase << @br << '-'*times << @br + when 5..6; wrapped.upcase << @br << '.'*times << @br + end + else + @plaintext[:body] << wrapped << @br # main text, contents, body KEEP + end + if @@endnotes[:para] \ + and @@dostype =~/footnote/ #edit out to switch off endnotes following paragraph to which they belong + @plaintext[:body] << @br + @@endnotes[:para].each {|e| @plaintext[:body] << e << @br} + elsif @@endnotes[:para] \ + and @@dostype =~/endnote/ + @plaintext[:body] << @br*2 + end + @@endnotes[:para]=[] + end + def markup(data) # Used for major markup instructions + dir=SiSU_Env::Info_env.new(@md.fns) + @data_mod,@endnotes,@level,@cont,@copen,@plaintext_contents_close=Array.new(6){[]} + (0..6).each { |x| @cont[x]=@level[x]=false } + (4..6).each { |x| @plaintext_contents_close[x]='' } + plaintext_tail #($1,$2) + table_message='[table omitted, see other document formats]' + fix=[] + data.each do |para| + para.gsub!(/#{Mx[:id_o]}~0;0:0;x\d+#{Mx[:id_c]}/,'') # if book index? remove + para.gsub!(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}.+/um,"#@br#{table_message}") + para.gsub!(/.+?#{Mx[:gl_o]}-##{Mx[:gl_c]}/,'') # remove dummy headings (used by html) #check + para.gsub!(/#{Mx[:gl_bullet]}\s*/,'* ') # bullet markup, marked down + para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'*\1*') + para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'/\1/') + para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'[\1]') + para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'_\1_') + para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'^\1^') + para.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+\1+') + para.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"\1"') + para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'-\1-') + unless para =~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/ + para.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}((?:https?|file|ftp):\/\/\S+|image)/,'\1 [link:] \2') + para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.txt_open}\\2#{@url_brace.txt_close}\\3") + para.gsub!(/_((?:https?|file|ftp):\/\/\S+)/,'\1') + extract_endnotes(para) + para.gsub!(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'[^\1]') # endnote marker marked up + para.gsub!(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'[^\1]') # endnote marker marked up + para.gsub!(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<') + para.gsub!(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>') + para.gsub!(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&') + para.gsub!(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!') + para.gsub!(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#') + para.gsub!(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*') + para.gsub!(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-') + para.gsub!(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/') + para.gsub!(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_') + para.gsub!(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{') + para.gsub!(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}') + para.gsub!(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~') + para.gsub!(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©') + end + if para =~/#{Mx[:gr_o]}(?:group|verse|alt|code)(?:-end)?#{Mx[:gr_c]}(?:\s+#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/ ##{Mx[:gr_o]}codeline#{Mx[:gr_c]} + if para =~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/ #code-block: angle brackets special characters + para.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _> _< + para.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _<_< + end + para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n") # watch + para.gsub!(/#{Mx[:gr_o]}(?:group|verse|alt|code)(?:-end)?#{Mx[:gr_c]}(?:\s+#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/,'') + else para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n\n") # watch introduces a bug + end + para.gsub!(/#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}/,'') # remove page breaks + para.gsub!(/^\s*#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/,'') # remove empty lines - check + para.gsub!(/(^|#{Mx[:gl_c]}|\s)[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') + para.gsub!(/(.+?)<\/a>/m,'\1') + para.gsub!(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'') # remove name links + para.gsub!(/ |#{Mx[:nbsp]}/,' ') # decide on + para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") + para.gsub!(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') + #para.gsub!(/^\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') + wordlist=para.scan(/\S+/) + if para =~/^#{Rx[:meta]}\s*(.+?)\Z/m # for headers + d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta + if d_meta; plaintext_metadata(d_meta) + end + end + if para !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ + if para =~@regx #/.+?<~\d+;\w\d+;\w\d+>.*/ #watch change + paranum=para[@regx,3] + @p_num=SiSU_Plaintext_format::Paragraph_number.new(paranum) + end + @sto=SiSU_Structure::Split_text_object.new(@md,para).txt + ### problem in scroll, it appears tables are getting paragraph numbers + m=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ + if para =~m \ + and para=~/\S+/ + para=case @sto.format + when /^(1):(\S*?)/ + plaintext_structure(para,$1,@sto.ocn,$2) + @sto.lev_para_ocn.heading_body1 + when /^(2):(\S*?)/ + plaintext_structure(para,$1,@sto.ocn,$2) + @sto.lev_para_ocn.heading_body2 + when /^(3):(\S*?)/ + plaintext_structure(para,$1,@sto.ocn,$2) + @sto.lev_para_ocn.heading_body3 + when /^(4):(\S+?)/ # work on see SiSU_text_parts::Split_text_object + plaintext_structure(para,$1,@sto.ocn,$2) + @sto.lev_para_ocn.heading_body4 + when /^(5):(\S*?)/ + plaintext_structure(para,$1,@sto.ocn,$2) + @sto.lev_para_ocn.heading_body5 + when /^(6):(\S*?)/ + plaintext_structure(para,$1,@sto.ocn,$2) + @sto.lev_para_ocn.heading_body6 + else + plaintext_structure(para,nil,nil,nil) #watch may be problematic + para + end + elsif para =~/#{table_message}/ + @plaintext[:body] << para << @br + elsif para =~/(Note|Endnotes?)/ \ + and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ + elsif para =~/(MetaData)/ \ + and para =~/#{Mx[:id_o]}~(\d+);[um]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #debug 2003w46 add rc info ####suspect visit + #formatMono=MonoSiSU.new('
MetaData') + #para=formatMono.bold_para + elsif para.include? 'Owner Details' \ + and para !~/#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ + #formatMono=MonoSiSU.new('
Owner Details') + #@plaintext[:owner_details]=formatMono.bold_para + #para='' + elsif para =~/(#{Mx[:tc_p]}|#{Mx[:gr_o]}Th?)/u #tables ! check + end + para='' if (para =~// \ + and para =~/^(-\{{2}~\d+|)/) # -endnote + case para + when /#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/ + if para =~/.*<:#>.*$/m + txt_obj={:txt =>para} + format_text=Format_text_object.new(@md,txt_obj) + para=format_text.scr_indent_one_no_paranum + end + end + if para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/ + # i don't get the condition for no paranum + end + #if para =~/<:center>/ + # one,two=/(.*)<:center>(.*)/.match(para)[1,2] + # format_text=Format_text_object.new(one,two) + # para=format_text.center + #end + para.gsub!(/#{Mx[:id_o]}.+?#{Mx[:id_c]}/,' ') if para ## Clean Prepared Text + para.gsub!(//,' ') if para ## Clean Prepared Text + para.gsub!(/<:\S+>/,' ') if para ## Clean Prepared Text + end + end + @plaintext + end + def publish(plaintext) + divider='=' + content=[] + content << plaintext[:open] + content << plaintext[:head] + content << plaintext[:body] + content << @@endnotes[:end] if @@dostype =~/endnotes/ + content << "#@br#{divider*78}#@br" + content << plaintext[:metadata] + content << "#@br#{divider*78}#@br" if @md.stmp =~/\w+/ #not used? + content << plaintext[:owner_details] if @md.stmp =~/\w+/ #not used? + content << plaintext[:tail] + Output.new(content,@md).plaintext + @@endnotes={ :para=>[],:end=>[] } + end + end + class Output 0 + para.each do |line| + line.gsub!(/\s+$/m,'') + file_plaintext.puts line #unix plaintext + end + else file_plaintext.puts para #unix plaintext # /^([*=-]|\.){5}/ + end + end + file_plaintext.close + end + end + end +end +__END__ +!\|#\|&*\|-\|/\|_\|{\|}\|~\|&# -- cgit v1.2.3