diff options
author | Ralph Amissah <ralph@amissah.com> | 2011-02-01 09:48:30 -0500 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2011-02-01 16:55:05 -0500 |
commit | 4b51bc00cda70d3c118401a74f1704df38c947a3 (patch) | |
tree | 8284fec609798d100b4663e42a842cb37cab985f /lib/sisu/v3/shared_markup_alt.rb | |
parent | prepare for v3 branch (diff) |
v3 introduced as development branch, invoked using "sisu --v3 [instructions]
Diffstat (limited to 'lib/sisu/v3/shared_markup_alt.rb')
-rw-r--r-- | lib/sisu/v3/shared_markup_alt.rb | 320 |
1 files changed, 320 insertions, 0 deletions
diff --git a/lib/sisu/v3/shared_markup_alt.rb b/lib/sisu/v3/shared_markup_alt.rb new file mode 100644 index 00000000..cbd1846b --- /dev/null +++ b/lib/sisu/v3/shared_markup_alt.rb @@ -0,0 +1,320 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + #___# + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see <http://www.gnu.org/licenses/>. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + <http://www.fsf.org/licensing/licenses/gpl.html> + <http://www.gnu.org/copyleft/gpl.html> + + <http://www.jus.uio.no/sisu/gpl.fsf/toc.html> + <http://www.jus.uio.no/sisu/gpl.fsf/doc.html> + <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt> + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + <http://www.jus.uio.no/sisu> + <http://www.sisudoc.org> + + * Download: + <http://www.jus.uio.no/sisu/SiSU/download.html> + + * Ralph Amissah + <ralph@amissah.com> + <ralph.amissah@gmail.com> + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_text_representation + class Alter + def initialize(x) + if x.class==String + @t_o,@s=nil,x + else + @t_o,@s=x,x.obj.dup + end + end + def strip_clean_of_extra_spaces # dal output tuned + @s=@s.dup + @s=@s.gsub(/[ ]+([,.;:?](?:$|\s))/,'\1') unless @s =~/#{Mx[:en_a_o]}|#{Mx[:en_b_o]}/ + @s=@s.gsub(/ [ ]+/,' ') + @s=@s.gsub(/^ [ ]+/,'') + @s=@s.gsub(/ [ ]+$/,'') + @s=@s.gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2') + @s=@s.gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2') + end + def strip_clean_of_markup # text form used in sql db search, used for digest, define rules, make same as in db clean + @s=@s.dup #% same as db clean --> + @s=@s.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') + @s=@s.gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') + @s=@s.gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') + @s=@s.gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1') + @s=@s.gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1') + @s=@s.gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strke_c]}/,'\1') + @s=@s.gsub(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]') + @s=@s.gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1') + @s=@s.gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1') + @s=@s.gsub(/#{Mx[:fa_hilite_o]}(.+?)#{Mx[:fa_hilite_c]}/,'\1') + @s=@s.gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~') + @s=@s.gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'') # endnote removed + @s=@s.gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'') # endnote removed + @s=@s.gsub(/(?:#{Mx[:nbsp]})+/,' ') + @s=@s.gsub(/(?:#{Mx[:br_nl]})+/,"\n") + @s=@s.gsub(/(?:#{Mx[:br_paragraph]})+/,"\n") + @s=@s.gsub(/(?:#{Mx[:br_line]})+/,"\n") + @s=@s.gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<') + @s=@s.gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>') + @s=@s.gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&') + @s=@s.gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!') + @s=@s.gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#') + @s=@s.gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*') + @s=@s.gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-') + @s=@s.gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/') + @s=@s.gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_') + @s=@s.gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{') + @s=@s.gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}') + @s=@s.gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~') + @s=@s.gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©') + @s=@s.gsub(/\s\s+/,' ') + @s=@s.gsub(/\s\s+/,' ') + @s=@s.strip + end + def semi_revert_markup # used for digest, define rules, make same as in db clean + if @t_o + @s=@s.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'*{\1}*') + @s=@s.gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'/{\1}/') + @s=@s.gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'_{\1}_') + @s=@s.gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"{\1}"') + @s=@s.gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+{\1}+') + @s=@s.gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strke_c]}/,'-{\1}-') + @s=@s.gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'^{\1}^') + @s=@s.gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,',{\1},') + @s=@s.gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~') + @s=@s.gsub(/#{Mx[:en_a_o]}([\d*+]+\s+.+?)#{Mx[:en_a_c]}/,'~{\1}~') # endnote marker marked up + @s=@s.gsub(/#{Mx[:en_b_o]}([\d*+]+\s+.+?)#{Mx[:en_b_c]}/,'~[\1]~') # endnote marker marked up + if @t_o.is=='heading' or @t_o.is=='para' + @s=@s.gsub(/ [ ]+/,' ') + @s=@s.gsub(/(?:#{Mx[:nbsp]})+/,' ') + if @t_o.is=='heading' + @s=@t_o.lv + '~ ' + @s + end + if @t_o.is=='para' + if @t_o.bullet_ + @s='_* ' + @s + end + if @t_o.indent.to_i > 0 + @s="_#{@t_o.indent} " + @s + @s=@s.gsub(/^(_[1-9])\s_\*\s/,'\1* ') + end + end + end + if @t_o.is=='block' \ + or @t_o.is=='group' \ + or @t_o.is=='code' + @s=@s.gsub(/#{Mx[:nbsp]}/,' ') + @s="#{@t_o.is}{\n\n#{@s}\n\n}#{@t_o.is}" + @s=@s.gsub(/(?:#{Mx[:br_nl]}|\n)+/m,"\n\n") + end + #dealing with poem and verse calls for change in dal, where start and end verse of poem are marked as such + @s=@s.strip + end + @s + end + def html_lite #test whether eventually can be used in db_import replacing shared_html_lite (search for SiSU_Format_Shared) + if @t_o + @s=@s.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'<b>\1</b>') + @s=@s.gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'<i>\1</i>') + @s=@s.gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'<u>\1</u>') + @s=@s.gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"\1"') + @s=@s.gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+{\1}+') + @s=@s.gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strke_c]}/,'-{\1}-') + @s=@s.gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'<sup>\1</sup>') + @s=@s.gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'<sub>\1</sub>') + @s=@s.gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~') + if @t_o.is !='code' + if @s =~/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/ + wm=@s.scan(/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)|\S+/) + words=urls(wm) + @s=@s.gsub(/.+/m,words) + end + @s=@s.gsub(/#{Mx[:gl_o]}(#[0-9]{3})#{Mx[:gl_c]}/u,'&\1;') + @s=@s.gsub(/#{Mx[:gl_o]}#([a-z]{2,4})#{Mx[:gl_c]}/u,'&\1;') + @s=@s.gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'<a href="\1" target="_top">\1</a>') #http ftp matches escaped, no decoration + @s=@s.gsub(/(#{Mx[:lnk_c]})#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url + @s=@s.gsub(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,%{#{@url_brace.xml_open}<a href="\\1" target="_top">\\1</a>#{@url_brace.xml_close}}) #http ftp matches with decoration + else + @s=@s.gsub(/(^|[^}])_</m,'\1<'); @s.gsub(/(^|[^}])_>/m,'\1>') #code-block: angle brackets special characters + @s=@s.gsub(/(^|[^}])_</m,'\1<'); @s.gsub(/(^|[^}])_>/m,'\1>') + end + if @t_o.is=='paragraph' + if @t_o.bullet_ + @s=@s + end + if @t_o.indent > 0 + @s=@s + end + end + if @t_o.is=='heading' + @s=@s + end + else + p __FILE__ +':'+ __LINE__.to_s + end + @s + end + end + class Modified_text_plus_Hash_digest + def initialize(md,x) + @md=md + if x.class==String + @t_o,@s=nil,x + else + @t_o,@s=x,x.obj.dup + end + @env ||=SiSU_Env::Info_env.new(@md.fns) + @sha_ =((@env.digest.type =='sha256') ? true : false) + @sha_ ? (require 'digest/sha2') : (require 'digest/md5') + end + def digest(txt) + d=nil + if @sha_ + for hash_class in [ Digest::SHA256 ] + d=hash_class.hexdigest(txt) + end + else + for hash_class in [ Digest::MD5 ] + d=hash_class.hexdigest(txt) + end + end + d + end + def strip_clean_of_markup + def txt + SiSU_text_representation::Alter.new(@s).strip_clean_of_markup + end + def dgst + en_dgst,img_dgst={},{} + txt_dgst=digest(txt) + {:txt=>txt,:dgst_txt=>txt_dgst} + end + self + end + def semi_revert_markup + def txt + SiSU_text_representation::Alter.new(@s).semi_revert_markup + end + def dgst + txt_dgst=digest(txt) + {:txt=>txt,:dgst_txt=>txt_dgst} + end + self + end + def composite + def stripped_clean(txt) + SiSU_text_representation::Alter.new(txt).strip_clean_of_markup + end + def markup_reverted(txt) + SiSU_text_representation::Alter.new(txt).semi_revert_markup + end + def images(imgs) + sys=SiSU_Env::System_call.new + line_image=[] + img_dgst={} + if imgs and imgs.length > 0 + @image_name,@image_dgst,@img=[],[],[] + imgs.each do |i| + image_source=if FileTest.file?("#{@env.path.image_source_include_local}/#{i}") + @env.path.image_source_include_local + elsif FileTest.file?("#{@env.path.image_source_include_remote}/#{i}") + @env.path.image_source_include_remote + elsif FileTest.file?("#{@env.path.image_source_include}/#{i}") + @env.path.image_source_include + else + SiSU_Screen::Ansi.new(@md.cmd,"ERROR - image:", %{"#{i}" missing}, "search locations: #{@env.path.image_source_include_local}, #{@env.path.image_source_include_remote} and #{@env.path.image_source_include}").error2 unless @md.cmd =~/q/ + nil + end + img_type = /\S+\.(png|jpg|gif)/.match(i)[1] + not_found_msg='image not found' + if image_source + para_image = image_source + '/' + i + image_name = i + image_dgst =(@sha_ ? sys.sha256(para_image) : sys.md5(para_image)) + else + image_name = i + ' [image missing]' + image_dgst = '' + end + line_image << {:img_dgst=>image_dgst[1],:img_name=>image_name,:img_type=>img_type} + end + end + line_image + end + def endnotes(en) + en_dgst=[] + if en and en.length > 0 + en.flatten.each do |e| + note_no=e.gsub(/^([\d*+]+)\s+.+/,'\1') + e=digest(stripped_clean(e)) + note_dgst=digest(e) + en_dgst << {:note_number=>note_no,:note_dgst=>note_dgst} + end + end + en_dgst + end + def dgst + if @t_o.of !='comment' and @t_o.of !='structure' and @t_o.of !='layout' + en_dgst,img_dgst={},{} + txt_stripped_dgst=digest(stripped_clean(@t_o)) + txt_markup_reverted_dgst=digest(markup_reverted(@t_o)) + endnotes_dgst=[] + rgx_notes=/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ + notes=@t_o.obj.scan(rgx_notes) + endnotes_dgst=endnotes(notes) + rgx_image=/#{Mx[:lnk_o]}(\S+\.(?:png|jpg|gif))\s.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/ + imgs=if (@t_o.is=='para' or @t_o.is=='image') \ + and @t_o.obj =~rgx_image + imgs=@t_o.obj.scan(rgx_image).flatten + line_image=images(imgs) + end + dgst={:is=>@t_o.is,:ocn=>@t_o.ocn,:dgst_stripped_txt=>txt_stripped_dgst,:dgst_markedup_txt=>txt_markup_reverted_dgst} + dgst[:endnotes]=endnotes_dgst if endnotes_dgst and endnotes_dgst.length > 0 + dgst[:images]=line_image if line_image and line_image.length > 0 + end + dgst + end + self + end + end +end +__END__ |