aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v2/db_sqltxt.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/v2/db_sqltxt.rb')
-rw-r--r--lib/sisu/v2/db_sqltxt.rb134
1 files changed, 0 insertions, 134 deletions
diff --git a/lib/sisu/v2/db_sqltxt.rb b/lib/sisu/v2/db_sqltxt.rb
deleted file mode 100644
index e1fbed29..00000000
--- a/lib/sisu/v2/db_sqltxt.rb
+++ /dev/null
@@ -1,134 +0,0 @@
-# coding: utf-8
-=begin
-
- * Name: SiSU
-
- * Description: a framework for document structuring, publishing and search
- #___#
-
- * Author: Ralph Amissah
-
- * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
- 2007, 2008, 2009, 2010 Ralph Amissah All Rights Reserved.
-
- * License: GPL 3 or later:
-
- SiSU, a framework for document structuring, publishing and search
-
- Copyright (C) Ralph Amissah
-
- This program is free software: you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the Free
- Software Foundation, either version 3 of the License, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program. If not, see <http://www.gnu.org/licenses/>.
-
- If you have Internet connection, the latest version of the GPL should be
- available at these locations:
- <http://www.fsf.org/licensing/licenses/gpl.html>
- <http://www.gnu.org/copyleft/gpl.html>
-
- <http://www.jus.uio.no/sisu/gpl.fsf/toc.html>
- <http://www.jus.uio.no/sisu/gpl.fsf/doc.html>
- <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt>
-
- * SiSU uses:
- * Standard SiSU markup syntax,
- * Standard SiSU meta-markup syntax, and the
- * Standard SiSU object citation numbering and system
-
- * Hompages:
- <http://www.jus.uio.no/sisu>
- <http://www.sisudoc.org>
-
- * Download:
- <http://www.jus.uio.no/sisu/SiSU/download.html>
-
- * Ralph Amissah
- <ralph@amissah.com>
- <ralph.amissah@gmail.com>
-
- ** Description: system environment, resource control and configuration details
-
-=end
-module SiSU_DB_text
- class Prepare
- def special_character_escape(str)
- str.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'")
- str.gsub!(/(\\)/m,'\1\1') #ok but with warnings, double backslash on sqlite #str.gsub!(/[\\]/m,'\\x5C') #ok but with warnings, but not for sqlite #str.gsub!(/(\\)/m,'\1') #ok for sqlite not for pgsql
- str.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"<br />\n")
- str.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check
- str.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2')
- str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2')
- str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'\1')
- str
- end
- def clean_searchable_text(arr) #produce clean, searchable, plaintext from document source
- txt_arr,en=[],[]
- arr=arr.class==String ? arr.split(/\n+/m) : arr
- arr.each do |s|
- s.gsub!(/([*\/_-])\{(.+?)\}\1/m,'\2')
- s.gsub!(/^(?:group|poem|code)\{/m,''); s.gsub!(/^\}(?:group|poem|code)/m,'')
- s.gsub!(/\A(?:@\S+:\s+.+)\Z/m,'')
- if s =~/^:A~/
- if defined? @md.creator \
- and defined? @md.creator.author \
- and not @md.creator.author.empty?
- s.gsub!(/@author/,@md.creator.author)
- else
- SiSU_Screen::Ansi.new('v','WARNING Document Author information missing; provide @creator: :author:',@md.fnb).warn unless @md.cmd.inspect =~/q/
- end
- if defined? @md.title \
- and defined? @md.title.full \
- and not @md.title.full.empty?
- s.gsub!(/@title/,@md.title.full)
- else
- SiSU_Screen::Ansi.new('v','WARNING Document Title missing; provide @title:',@md.fnb).warn unless @md.cmd.inspect =~/q/
- end
- end
- s.gsub!(/^(?:_[1-9]\*?|_\*)\s+/m,'')
- s.gsub!(/^(?:[1-9]\~(\S+)?)\s+/m,'')
- s.gsub!(/^(?::?[A-C]\~(\S+)?)\s+/m,'')
- s.gsub!(/^%{1,3} .+/m,'') #removed even if contained in code block
- s.gsub!(/<br>/m,' ')
- en << s.scan(/~\{\s*(.+?)\s*\}~/m)
- s.gsub!(/~\{.+?\}~/m,'')
- s.gsub!(/ \s+/m,' ')
- #special_character_escape(s)
- s
- end
- txt_arr << arr << en
- #txt_arr=txt_arr.flatten
- txt=txt_arr.flatten.join("\n")
- txt=special_character_escape(txt)
- txt
- end
- def strip_markup(str) #define rules, make same as in dal clean
- str.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]')
- str.gsub!(/(?:&nbsp\\;|#{Mx[:nbsp]})+/,' ')
- str.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1') #tables
- str.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ') #tables
- str.gsub!(/#{Mx[:tc_p]}/u,' ') #tables tidy later
- str.gsub!(/<.+?>/,'')
- str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] ') # else image names found in search
- str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]') # else image names found in search
- str.gsub!(/\s\s+/,' ')
- str.strip!
- str
- end
- def unique_words(str)
- a=str.scan(/[a-zA-Z0-9\\\/_-]{2,}/) #a=str.scan(/\S+{2,}/)
- str=a.uniq.sort.join(' ')
- str
- end
- end
-end
-__END__
-