diff options
author | Ralph Amissah <ralph@amissah.com> | 2012-01-10 22:37:26 -0500 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2012-01-10 22:42:20 -0500 |
commit | 75e3bf86382edf99275a25895b362647158e25c1 (patch) | |
tree | 7ec458f15d0bf981c7e044244a8cbf55205141b7 /lib/sisu/v3dv/db_sqltxt.rb | |
parent | v3: date, year 2012, update (diff) |
v3dv, add dev branch (use to make some changes to module & class names & test)
* (intended as) short term branch, merge back into v3 once tested
* sisu --dev (to invoke)
Diffstat (limited to 'lib/sisu/v3dv/db_sqltxt.rb')
-rw-r--r-- | lib/sisu/v3dv/db_sqltxt.rb | 132 |
1 files changed, 132 insertions, 0 deletions
diff --git a/lib/sisu/v3dv/db_sqltxt.rb b/lib/sisu/v3dv/db_sqltxt.rb new file mode 100644 index 00000000..d1705c2b --- /dev/null +++ b/lib/sisu/v3dv/db_sqltxt.rb @@ -0,0 +1,132 @@ +# encoding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2012, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see <http://www.gnu.org/licenses/>. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + <http://www.fsf.org/licensing/licenses/gpl.html> + <http://www.gnu.org/licenses/gpl.html> + + <http://www.jus.uio.no/sisu/gpl.fsf/toc.html> + <http://www.jus.uio.no/sisu/gpl.fsf/doc.html> + <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt> + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + <http://www.jus.uio.no/sisu> + <http://www.sisudoc.org> + + * Download: + <http://www.jus.uio.no/sisu/SiSU/download.html> + + * Ralph Amissah + <ralph@amissah.com> + <ralph.amissah@gmail.com> + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_DB_text + class Prepare + def special_character_escape(str) + str.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") + str.gsub!(/(\\)/m,'\1\1') #ok but with warnings, double backslash on sqlite #str.gsub!(/[\\]/m,'\\x5C') #ok but with warnings, but not for sqlite #str.gsub!(/(\\)/m,'\1') #ok for sqlite not for pgsql + str.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"<br />\n") + str.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check + str.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2') + str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2') + str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'\1') + str + end + def clean_searchable_text(arr) #produce clean, searchable, plaintext from document source + txt_arr,en=[],[] + arr=arr.class==String ? arr.split(/\n+/m) : arr + arr.each do |s| + s.gsub!(/([*\/_-])\{(.+?)\}\1/m,'\2') + s.gsub!(/^(?:block|group|poem|code)\{/m,''); s.gsub!(/^\}(?:block|group|poem|code)/m,'') + s.gsub!(/\A(?:@\S+:\s+.+)\Z/m,'') + if s =~/^:A~/ + if defined? @md.creator \ + and defined? @md.creator.author \ + and not @md.creator.author.empty? + s.gsub!(/@author/,@md.creator.author) + else + SiSU_Screen::Ansi.new('v','WARNING Document Author information missing; provide @creator: :author:',@md.fnb).warn unless @md.opt.cmd.inspect =~/q/ + end + if defined? @md.title \ + and defined? @md.title.full \ + and not @md.title.full.empty? + s.gsub!(/@title/,@md.title.full) + else + SiSU_Screen::Ansi.new('v','WARNING Document Title missing; provide @title:',@md.fnb).warn unless @md.opt.cmd.inspect =~/q/ + end + end + s.gsub!(/^(?:_[1-9]\*?|_\*)\s+/m,'') + s.gsub!(/^(?:[1-9]\~(\S+)?)\s+/m,'') + s.gsub!(/^(?::?[A-C]\~(\S+)?)\s+/m,'') + s.gsub!(/^%{1,3} .+/m,'') #removed even if contained in code block + s.gsub!(/<br>/m,' ') + en << s.scan(/~\{\s*(.+?)\s*\}~/m) + s.gsub!(/~\{.+?\}~/m,'') + s.gsub!(/ \s+/m,' ') + #special_character_escape(s) + s + end + txt_arr << arr << en + #txt_arr=txt_arr.flatten + txt=txt_arr.flatten.join("\n") + txt=special_character_escape(txt) + txt + end + def strip_markup(str) #define rules, make same as in dal clean + str.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]') + str.gsub!(/(?: \\;|#{Mx[:nbsp]})+/,' ') + str.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1') #tables + str.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ') #tables + str.gsub!(/#{Mx[:tc_p]}/u,' ') #tables tidy later + str.gsub!(/<.+?>/,'') + str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] ') # else image names found in search + str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]') # else image names found in search + str.gsub!(/\s\s+/,' ') + str.strip! + str + end + def unique_words(str) + a=str.scan(/[a-zA-Z0-9\\\/_-]{2,}/) #a=str.scan(/\S+{2,}/) + str=a.uniq.sort.join(' ') + str + end + end +end +__END__ + |