aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v2/db_sqltxt.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/v2/db_sqltxt.rb')
-rw-r--r--lib/sisu/v2/db_sqltxt.rb115
1 files changed, 115 insertions, 0 deletions
diff --git a/lib/sisu/v2/db_sqltxt.rb b/lib/sisu/v2/db_sqltxt.rb
new file mode 100644
index 00000000..f120b95f
--- /dev/null
+++ b/lib/sisu/v2/db_sqltxt.rb
@@ -0,0 +1,115 @@
+# coding: utf-8
+=begin
+
+ * Name: SiSU
+
+ * Description: a framework for document structuring, publishing and search
+ #___#
+
+ * Author: Ralph Amissah
+
+ * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2007, 2008, 2009, 2010 Ralph Amissah All Rights Reserved.
+
+ * License: GPL 3 or later:
+
+ SiSU, a framework for document structuring, publishing and search
+
+ Copyright (C) Ralph Amissah
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see <http://www.gnu.org/licenses/>.
+
+ If you have Internet connection, the latest version of the GPL should be
+ available at these locations:
+ <http://www.fsf.org/licensing/licenses/gpl.html>
+ <http://www.gnu.org/copyleft/gpl.html>
+
+ <http://www.jus.uio.no/sisu/gpl.fsf/toc.html>
+ <http://www.jus.uio.no/sisu/gpl.fsf/doc.html>
+ <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt>
+
+ * SiSU uses:
+ * Standard SiSU markup syntax,
+ * Standard SiSU meta-markup syntax, and the
+ * Standard SiSU object citation numbering and system
+
+ * Hompages:
+ <http://www.jus.uio.no/sisu>
+ <http://www.sisudoc.org>
+
+ * Download:
+ <http://www.jus.uio.no/sisu/SiSU/download.html>
+
+ * Ralph Amissah
+ <ralph@amissah.com>
+ <ralph.amissah@gmail.com>
+
+ ** Description: system environment, resource control and configuration details
+
+=end
+module SiSU_DB_text
+ class Prepare
+ def special_character_escape(str)
+ str.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'")
+ str.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"<br />\n")
+ str.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check
+ str.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2')
+ str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2')
+ str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'\1')
+ str
+ end
+ def clean_searchable_text(arr) #produce clean, searchable, plaintext from document source
+ txt_arr,en=[],[]
+ arr.each do |s|
+ s.gsub!(/([*\/_-])\{(.+?)\}\1/,'\2')
+ s.gsub!(/^(?:group|poem|code)\{/,''); s.gsub!(/^\}(?:group|poem|code)/,'')
+ s.gsub!(/\A(?:@\S+:\s+.+)\Z/m,'')
+ if s =~/^:A~/
+ s.gsub!(/@author/,@md.creator.author)
+ s.gsub!(/@title/,@md.title.full)
+ end
+ s.gsub!(/^(?:_[1-9]\*?|_\*)\s+/,'')
+ s.gsub!(/^(?:[1-9]\~(\S+)?)\s+/,'')
+ s.gsub!(/^(?::?[A-C]\~(\S+)?)\s+/,'')
+ s.gsub!(/^%{1,3} .+/,'') #removed even if contained in code block
+ s.gsub!(/<br>/,' ')
+ en << s.scan(/~\{\s*(.+?)\s*\}~/)
+ s.gsub!(/~\{.+?\}~/,'')
+ s.gsub!(/ \s+/,' ')
+ #special_character_escape(s)
+ s
+ end
+ txt_arr << arr << en
+ #txt_arr=txt_arr.flatten
+ txt=txt_arr.flatten.join("\n")
+ txt=special_character_escape(txt)
+ txt
+ end
+ def strip_markup(str) #define rules, make same as in dal clean
+ str.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]')
+ str.gsub!(/(?:&nbsp\\;|#{Mx[:nbsp]})+/,' ')
+ str.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1') #tables
+ str.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ') #tables
+ str.gsub!(/#{Mx[:tc_p]}/u,' ') #tables tidy later
+ str.gsub!(/<.+?>/,'')
+ str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] ') # else image names found in search
+ str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]') # else image names found in search
+ str.gsub!(/\s\s+/,' ')
+ str.strip!
+ str
+ end
+ end
+end
+__END__
+