diff options
author | Ralph Amissah <ralph@amissah.com> | 2012-10-03 00:11:08 -0400 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2012-10-03 00:11:08 -0400 |
commit | 804a103722aa7731ca7f2062ee2ebf533607e6aa (patch) | |
tree | a480caebb78925848807692c57c017b3ae5e6839 /lib/sisu/v2/db_sqltxt.rb | |
parent | v3: 3.3.3 version & changelog, dates touched (diff) |
v4: 4.0.0 new branch & version & changelog "opened"
Diffstat (limited to 'lib/sisu/v2/db_sqltxt.rb')
-rw-r--r-- | lib/sisu/v2/db_sqltxt.rb | 134 |
1 files changed, 0 insertions, 134 deletions
diff --git a/lib/sisu/v2/db_sqltxt.rb b/lib/sisu/v2/db_sqltxt.rb deleted file mode 100644 index e1fbed29..00000000 --- a/lib/sisu/v2/db_sqltxt.rb +++ /dev/null @@ -1,134 +0,0 @@ -# coding: utf-8 -=begin - - * Name: SiSU - - * Description: a framework for document structuring, publishing and search - #___# - - * Author: Ralph Amissah - - * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, - 2007, 2008, 2009, 2010 Ralph Amissah All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see <http://www.gnu.org/licenses/>. - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - <http://www.fsf.org/licensing/licenses/gpl.html> - <http://www.gnu.org/copyleft/gpl.html> - - <http://www.jus.uio.no/sisu/gpl.fsf/toc.html> - <http://www.jus.uio.no/sisu/gpl.fsf/doc.html> - <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt> - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - <http://www.jus.uio.no/sisu> - <http://www.sisudoc.org> - - * Download: - <http://www.jus.uio.no/sisu/SiSU/download.html> - - * Ralph Amissah - <ralph@amissah.com> - <ralph.amissah@gmail.com> - - ** Description: system environment, resource control and configuration details - -=end -module SiSU_DB_text - class Prepare - def special_character_escape(str) - str.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") - str.gsub!(/(\\)/m,'\1\1') #ok but with warnings, double backslash on sqlite #str.gsub!(/[\\]/m,'\\x5C') #ok but with warnings, but not for sqlite #str.gsub!(/(\\)/m,'\1') #ok for sqlite not for pgsql - str.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"<br />\n") - str.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check - str.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2') - str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2') - str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'\1') - str - end - def clean_searchable_text(arr) #produce clean, searchable, plaintext from document source - txt_arr,en=[],[] - arr=arr.class==String ? arr.split(/\n+/m) : arr - arr.each do |s| - s.gsub!(/([*\/_-])\{(.+?)\}\1/m,'\2') - s.gsub!(/^(?:group|poem|code)\{/m,''); s.gsub!(/^\}(?:group|poem|code)/m,'') - s.gsub!(/\A(?:@\S+:\s+.+)\Z/m,'') - if s =~/^:A~/ - if defined? @md.creator \ - and defined? @md.creator.author \ - and not @md.creator.author.empty? - s.gsub!(/@author/,@md.creator.author) - else - SiSU_Screen::Ansi.new('v','WARNING Document Author information missing; provide @creator: :author:',@md.fnb).warn unless @md.cmd.inspect =~/q/ - end - if defined? @md.title \ - and defined? @md.title.full \ - and not @md.title.full.empty? - s.gsub!(/@title/,@md.title.full) - else - SiSU_Screen::Ansi.new('v','WARNING Document Title missing; provide @title:',@md.fnb).warn unless @md.cmd.inspect =~/q/ - end - end - s.gsub!(/^(?:_[1-9]\*?|_\*)\s+/m,'') - s.gsub!(/^(?:[1-9]\~(\S+)?)\s+/m,'') - s.gsub!(/^(?::?[A-C]\~(\S+)?)\s+/m,'') - s.gsub!(/^%{1,3} .+/m,'') #removed even if contained in code block - s.gsub!(/<br>/m,' ') - en << s.scan(/~\{\s*(.+?)\s*\}~/m) - s.gsub!(/~\{.+?\}~/m,'') - s.gsub!(/ \s+/m,' ') - #special_character_escape(s) - s - end - txt_arr << arr << en - #txt_arr=txt_arr.flatten - txt=txt_arr.flatten.join("\n") - txt=special_character_escape(txt) - txt - end - def strip_markup(str) #define rules, make same as in dal clean - str.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]') - str.gsub!(/(?: \\;|#{Mx[:nbsp]})+/,' ') - str.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1') #tables - str.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ') #tables - str.gsub!(/#{Mx[:tc_p]}/u,' ') #tables tidy later - str.gsub!(/<.+?>/,'') - str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] ') # else image names found in search - str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]') # else image names found in search - str.gsub!(/\s\s+/,' ') - str.strip! - str - end - def unique_words(str) - a=str.scan(/[a-zA-Z0-9\\\/_-]{2,}/) #a=str.scan(/\S+{2,}/) - str=a.uniq.sort.join(' ') - str - end - end -end -__END__ - |