From 7372f56054259457f77c64cbdb34e736531cfc0e Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 4 Jul 2009 11:57:29 -0400 Subject: move lib to version 1 directory, (lib/sisu/v1) and make related changes --- lib/sisu/v0/plaintext.rb | 448 ----------------------------------------------- 1 file changed, 448 deletions(-) delete mode 100644 lib/sisu/v0/plaintext.rb (limited to 'lib/sisu/v0/plaintext.rb') diff --git a/lib/sisu/v0/plaintext.rb b/lib/sisu/v0/plaintext.rb deleted file mode 100644 index dd2964d9..00000000 --- a/lib/sisu/v0/plaintext.rb +++ /dev/null @@ -1,448 +0,0 @@ -# coding: utf-8 -=begin - - * Name: SiSU - - * Description: a framework for document structuring, publishing and search - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2009 Ralph Amissah All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Download: - - - * Ralph Amissah - - - - ** Description: plaintext text generation, stripped plaintext output (unix, - linefeed) - -=end -module SiSU_Plaintext - require "#{SiSU_lib}/dal" - require "#{SiSU_lib}/sysenv" - include SiSU_Env - include SiSU_Param - include SiSU_Viz - require "#{SiSU_lib}/plaintext_format" - include SiSU_Plaintext_format - require "#{SiSU_lib}/shared_txt" - require "#{SiSU_lib}/shared_structure" - pwd=Dir.pwd - @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0 - @@tablefoot='' - class Source - def initialize(opt) - @opt=opt - @@dostype=if @opt.fns =~/(.+?)\.(?:-|ssm\.)?sst$/ - if @opt.mod.inspect =~ /--footnote/ \ - and @opt.mod.inspect =~ /--dos/ - 'msdos footnotes' - elsif @opt.mod.inspect =~ /--endnote/ \ - and @opt.mod.inspect =~ /--dos/ - 'msdos endnotes' - elsif @opt.mod.inspect =~ /--footnote/ - 'unix footnotes' - elsif @opt.mod.inspect =~ /--endnote/ - 'unix endnotes' - else 'unix footnotes' - end - else puts "#{sf} not a processed file type" - end - end - def read - begin - @md=SiSU_Param::Parameters.new(@opt).get - @env=SiSU_Env::Info_env.new(@opt.fns) - path=@env.path.output_tell - tool=if @opt.cmd =~/[MVv]/; "#{@env.program.text_editor} #{path}/#{@md.fnb}/#{@md.fn[:plain]}" - else '' - end - tell=SiSU_Screen::Ansi.new(@opt.cmd,'Plaintext',tool) - tell.green_hi_blue unless @opt.cmd =~/q/ - tell=SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{@env.path.output_tell}/#{@md.fnb}/#{@md.fn[:plain]}") - tell.flow if @opt.cmd =~/[MV]/ - my_make=SiSU_Env::Create_file.new(@opt.cmd,@opt.fns) - @dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here - SiSU_Plaintext::Source::Scroll.new(@dal_array,@md).songsheet - SiSU_Env::Info_skin.new(@md).select #watch - rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error - ensure - end - end - private - class Scroll [],:end=>[] } - @@dp=nil - def initialize(data,md) - @data,@md=data,md - @url_brace=SiSU_Viz::Skin.new.url_decoration - @vz=SiSU_Env::Get_init.instance.skin - @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern - @regx=/^(?:(?:#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}\s*)?#{Mx[:lv_o]}\d:(\S*?)#{Mx[:lv_c]})?\s*(.+?)\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m # 2004w18 pb pn removal added - @tab="\t" - @br=if md.mod.inspect =~ /--footnote/ \ - and md.mod.inspect =~ /--dos/ - @@dostype='msdos footnotes' - "\r\n" - elsif md.mod.inspect =~ /--endnote/ \ - and md.mod.inspect =~ /--dos/ - @@dostype='msdos endnotes' - "\r\n" - elsif md.mod.inspect =~ /--footnote/ - @@dostype='unix footnotes' - "\n" - elsif md.mod.inspect =~ /--endnote/ - @@dostype='unix endnotes' - "\n" - else - @@dostype='unix footnotes' - "\n" - end - @plaintext={ :body=>[],:open=>[],:close=>[],:head=>[],:metadata=>[],:tail=>[] } - end - def songsheet - plaintext=markup(@data) - publish(plaintext) - end - # Used for extraction of endnotes from paragraphs - def extract_endnotes(para='') - notes=para.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/) - @n=[] - notes.flatten.each do |n| #high cost to deal with
appropriately within plaintext, consider - n=n.dup.to_s - if n =~/#{Mx[:br_line]}|#{Mx[:br_nl]}/ - fix = n.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/) #watch #added - fix.each do |x| - unless x.empty?; @n << x - end - end - else @n << n - end - end - notes=@n.flatten - notes.each do |e| - util=if e.to_s =~/^\[[\d*+]+\]:/; SiSU_text_utils::Wrap.new(e.to_s,78,4,1) - else SiSU_text_utils::Wrap.new(e.to_s,78,1,1) - end - wrap=util.line_wrap - if wrap =~ /^\s*[\d*+]+\s+.+?\s*\Z/m - wrap.gsub!(/^(\s*)([\d*+]+)\s+(.+?)\s*\Z/m, < 78 - @plaintext[:body] << case lv - when 1; wrapped.upcase << @br << '*'*times << @br - when 2..3; wrapped.upcase << @br << '='*times << @br - when 4; wrapped.upcase << @br << '-'*times << @br - when 5..6; wrapped.upcase << @br << '.'*times << @br - end - else - @plaintext[:body] << wrapped << @br # main text, contents, body KEEP - end - if @@endnotes[:para] \ - and @@dostype =~/footnote/ #edit out to switch off endnotes following paragraph to which they belong - @plaintext[:body] << @br - @@endnotes[:para].each {|e| @plaintext[:body] << e << @br} - elsif @@endnotes[:para] \ - and @@dostype =~/endnote/ - @plaintext[:body] << @br*2 - end - @@endnotes[:para]=[] - end - def markup(data) # Used for major markup instructions - dir=SiSU_Env::Info_env.new(@md.fns) - @data_mod,@endnotes,@level,@cont,@copen,@plaintext_contents_close=Array.new(6){[]} - (0..6).each { |x| @cont[x]=@level[x]=false } - (4..6).each { |x| @plaintext_contents_close[x]='' } - plaintext_tail #($1,$2) - table_message='[table omitted, see other document formats]' - fix=[] - data.each do |para| - para.gsub!(/#{Mx[:id_o]}~0;0:0;x\d+#{Mx[:id_c]}/,'') # if book index? remove - para.gsub!(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}.+/um,"#@br#{table_message}") - para.gsub!(/.+?#{Mx[:gl_o]}-##{Mx[:gl_c]}/,'') # remove dummy headings (used by html) #check - para.gsub!(/#{Mx[:gl_bullet]}\s*/,'* ') # bullet markup, marked down - para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'*\1*') - para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'/\1/') - para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'[\1]') - para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'_\1_') - para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'^\1^') - para.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+\1+') - para.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"\1"') - para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'-\1-') - unless para =~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/ - para.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}((?:https?|file|ftp):\/\/\S+|image)/,'\1 [link:] \2') - para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.txt_open}\\2#{@url_brace.txt_close}\\3") - para.gsub!(/_((?:https?|file|ftp):\/\/\S+)/,'\1') - extract_endnotes(para) - para.gsub!(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'[^\1]') # endnote marker marked up - para.gsub!(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'[^\1]') # endnote marker marked up - para.gsub!(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<') - para.gsub!(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>') - para.gsub!(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&') - para.gsub!(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!') - para.gsub!(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#') - para.gsub!(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*') - para.gsub!(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-') - para.gsub!(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/') - para.gsub!(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_') - para.gsub!(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{') - para.gsub!(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}') - para.gsub!(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~') - para.gsub!(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©') - end - if para =~/#{Mx[:gr_o]}(?:group|verse|alt|code)(?:-end)?#{Mx[:gr_c]}(?:\s+#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/ ##{Mx[:gr_o]}codeline#{Mx[:gr_c]} - if para =~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/ #code-block: angle brackets special characters - para.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _> _< - para.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _<_< - end - para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n") # watch - para.gsub!(/#{Mx[:gr_o]}(?:group|verse|alt|code)(?:-end)?#{Mx[:gr_c]}(?:\s+#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/,'') - else para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n\n") # watch introduces a bug - end - para.gsub!(/#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}/,'') # remove page breaks - para.gsub!(/^\s*#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/,'') # remove empty lines - check - para.gsub!(/(^|#{Mx[:gl_c]}|\s)[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') - para.gsub!(/(.+?)<\/a>/m,'\1') - para.gsub!(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'') # remove name links - para.gsub!(/ |#{Mx[:nbsp]}/,' ') # decide on - para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") - para.gsub!(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') - #para.gsub!(/^\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') - wordlist=para.scan(/\S+/) - if para =~/^#{Rx[:meta]}\s*(.+?)\Z/m # for headers - d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta - if d_meta; plaintext_metadata(d_meta) - end - end - if para !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ - if para =~@regx #/.+?<~\d+;\w\d+;\w\d+>.*/ #watch change - paranum=para[@regx,3] - @p_num=SiSU_Plaintext_format::Paragraph_number.new(paranum) - end - @sto=SiSU_Structure::Split_text_object.new(@md,para).txt - ### problem in scroll, it appears tables are getting paragraph numbers - m=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ - if para =~m \ - and para=~/\S+/ - para=case @sto.format - when /^(1):(\S*?)/ - plaintext_structure(para,$1,@sto.ocn,$2) - @sto.lev_para_ocn.heading_body1 - when /^(2):(\S*?)/ - plaintext_structure(para,$1,@sto.ocn,$2) - @sto.lev_para_ocn.heading_body2 - when /^(3):(\S*?)/ - plaintext_structure(para,$1,@sto.ocn,$2) - @sto.lev_para_ocn.heading_body3 - when /^(4):(\S+?)/ # work on see SiSU_text_parts::Split_text_object - plaintext_structure(para,$1,@sto.ocn,$2) - @sto.lev_para_ocn.heading_body4 - when /^(5):(\S*?)/ - plaintext_structure(para,$1,@sto.ocn,$2) - @sto.lev_para_ocn.heading_body5 - when /^(6):(\S*?)/ - plaintext_structure(para,$1,@sto.ocn,$2) - @sto.lev_para_ocn.heading_body6 - else - plaintext_structure(para,nil,nil,nil) #watch may be problematic - para - end - elsif para =~/#{table_message}/ - @plaintext[:body] << para << @br - elsif para =~/(Note|Endnotes?)/ \ - and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ - elsif para =~/(MetaData)/ \ - and para =~/#{Mx[:id_o]}~(\d+);[um]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #debug 2003w46 add rc info ####suspect visit - #formatMono=MonoSiSU.new('
MetaData') - #para=formatMono.bold_para - elsif para.include? 'Owner Details' \ - and para !~/#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ - #formatMono=MonoSiSU.new('
Owner Details') - #@plaintext[:owner_details]=formatMono.bold_para - #para='' - elsif para =~/(#{Mx[:tc_p]}|#{Mx[:gr_o]}Th?)/u #tables ! check - end - para='' if (para =~// \ - and para =~/^(-\{{2}~\d+|)/) # -endnote - case para - when /#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/ - if para =~/.*<:#>.*$/m - txt_obj={:txt =>para} - format_text=Format_text_object.new(@md,txt_obj) - para=format_text.scr_indent_one_no_paranum - end - end - if para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/ - # i don't get the condition for no paranum - end - #if para =~/<:center>/ - # one,two=/(.*)<:center>(.*)/.match(para)[1,2] - # format_text=Format_text_object.new(one,two) - # para=format_text.center - #end - para.gsub!(/#{Mx[:id_o]}.+?#{Mx[:id_c]}/,' ') if para ## Clean Prepared Text - para.gsub!(//,' ') if para ## Clean Prepared Text - para.gsub!(/<:\S+>/,' ') if para ## Clean Prepared Text - end - end - @plaintext - end - def publish(plaintext) - divider='=' - content=[] - content << plaintext[:open] - content << plaintext[:head] - content << plaintext[:body] - content << @@endnotes[:end] if @@dostype =~/endnotes/ - content << "#@br#{divider*78}#@br" - content << plaintext[:metadata] - content << "#@br#{divider*78}#@br" if @md.stmp =~/\w+/ #not used? - content << plaintext[:owner_details] if @md.stmp =~/\w+/ #not used? - content << plaintext[:tail] - Output.new(content,@md).plaintext - @@endnotes={ :para=>[],:end=>[] } - end - end - class Output 0 - para.each do |line| - line.gsub!(/\s+$/m,'') - file_plaintext.puts line #unix plaintext - end - else file_plaintext.puts para #unix plaintext # /^([*=-]|\.){5}/ - end - end - file_plaintext.close - end - end - end -end -__END__ -!\|#\|&*\|-\|/\|_\|{\|}\|~\|&# -- cgit v1.2.3