aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/db_sqltxt.rb
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2015-04-21 14:45:52 -0400
committerRalph Amissah <ralph@amissah.com>2015-05-01 18:49:41 -0400
commit960c3088bc88f2db879154053280b06c160d4d70 (patch)
tree624e14806190ac7edcd1400e8e2142ffba17afec /lib/sisu/db_sqltxt.rb
parentversion & changelog v7 (diff)
lib/sisu/*, single libs directory (c&d gone) (7)
* removed lib/sisu/{current,develop} dir branches v7 (v5 & v6 retired) * simplify dir structure, offer single version per snapshot * have enjoyed carrying stable and development versions v5 & v6 in a single tarball, may return to this structure
Diffstat (limited to 'lib/sisu/db_sqltxt.rb')
-rw-r--r--lib/sisu/db_sqltxt.rb173
1 files changed, 173 insertions, 0 deletions
diff --git a/lib/sisu/db_sqltxt.rb b/lib/sisu/db_sqltxt.rb
new file mode 100644
index 00000000..92d67f7d
--- /dev/null
+++ b/lib/sisu/db_sqltxt.rb
@@ -0,0 +1,173 @@
+# encoding: utf-8
+=begin
+
+* Name: SiSU
+
+** Description: documents, structuring, processing, publishing, search
+*** system environment, resource control and configuration details
+
+** Author: Ralph Amissah
+ <ralph@amissah.com>
+ <ralph.amissah@gmail.com>
+
+** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah,
+ All Rights Reserved.
+
+** License: GPL 3 or later:
+
+ SiSU, a framework for document structuring, publishing and search
+
+ Copyright (C) Ralph Amissah
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see <http://www.gnu.org/licenses/>.
+
+ If you have Internet connection, the latest version of the GPL should be
+ available at these locations:
+ <http://www.fsf.org/licensing/licenses/gpl.html>
+ <http://www.gnu.org/licenses/gpl.html>
+
+ <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>
+
+** SiSU uses:
+ * Standard SiSU markup syntax,
+ * Standard SiSU meta-markup syntax, and the
+ * Standard SiSU object citation numbering and system
+
+** Hompages:
+ <http://www.jus.uio.no/sisu>
+ <http://www.sisudoc.org>
+
+** Git
+ <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary>
+ <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/db_sqltxt.rb;hb=HEAD>
+
+=end
+module SiSU_DbText
+ class Prepare
+ def special_character_escape(str)
+ str=str.gsub(/'/m,"''"). #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'")
+ gsub(/(\\)/m,'\1\1'). #ok but with warnings, double backslash on sqlite #str.gsub!(/[\\]/m,'\\x5C') #ok but with warnings, but not for sqlite #str.gsub!(/(\\)/m,'\1') #ok for sqlite not for pgsql
+ gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/m,"<br>\n").
+ gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/m,''). #check
+ gsub(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/m,'[image: \1] \2').
+ gsub(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/m,'\1\2').
+ gsub(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/m,'\1')
+ end
+ def clean_searchable_text_from_document_objects(arr)
+ en=[]
+ arr=(arr.is_a?(String)) ? [ arr ] : arr
+ txt_arr=arr.each.map do |s|
+ s=s.gsub(/#{Mx[:fa_o]}[a-z]{1,4}#{Mx[:fa_o_c]}/m,'').
+ gsub(/#{Mx[:fa_c_o]}[a-z]{1,4}#{Mx[:fa_c]}/m,'').
+ gsub(/<br>/m,' ')
+ en << s.scan(/#{Mx[:en_a_o]}\s*(.+?)\s*#{Mx[:en_a_c]}/m)
+ s=s.gsub(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/m,'').
+ gsub(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m,'').
+ gsub(/ \s+/m,' ')
+ #p s if s =~/[^ \nA-Za-z0-9'"`?!#@$%^&*=+,.;:\[\]()<>{}‹›|\\\/~_-]/
+ s
+ end
+ txt_arr=txt_arr << en
+ txt=txt_arr.flatten.join("\n")
+ special_character_escape(txt)
+ end
+ def clean_document_objects_body(arr)
+ en=[]
+ arr=(arr.is_a?(String)) ? [ arr ] : arr
+ txt_arr=arr.each.map do |s|
+ en << s.scan(/#{Mx[:en_a_o]}\s*(.+?)\s*#{Mx[:en_a_c]}/m)
+ s=s.
+ gsub(/#{Mx[:en_a_o]}\s*(\d+).+?#{Mx[:en_a_c]}/m,
+ '<sup>\1</sup>').
+ gsub(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m,'').
+ gsub(/ \s+/m,' ')
+ s
+ end
+ en_arr=en.flatten.each.map do |e|
+ e.sub(/^(\d+)\s*/,'<sup>\1</sup> ')
+ end
+ txt_arr=txt_arr << en_arr
+ txt=txt_arr.flatten.join("\n<br>")
+ special_character_escape(txt)
+ end
+ def clean_searchable_text_from_document_source(arr)
+ txt_arr,en=[],[]
+ arr=(arr.is_a?(String)) ? arr.split(/\n+/m) : arr
+ arr.each do |s|
+ s=s.gsub(/([*\/_-])\{(.+?)\}\1/m,'\2').
+ gsub(/^(?:block|group|poem|code)\{/m,'').
+ gsub(/^\}(?:block|group|poem|code)/m,'').
+ gsub(/\A(?:@\S+:\s+.+)\Z/m,'')
+ if s =~/^:A~/
+ if defined? @md.creator \
+ and defined? @md.creator.author \
+ and not @md.creator.author.empty?
+ s=s.gsub(/@author/,@md.creator.author)
+ else
+ SiSU_Screen::Ansi.new(
+ 'v',
+ 'WARNING Document Author information missing; provide @creator: :author:',
+ @md.fnb
+ ).warn unless @md.opt.act[:quiet][:set]==:on
+ end
+ if defined? @md.title \
+ and defined? @md.title.full \
+ and not @md.title.full.empty?
+ s=s.gsub(/@title/,@md.title.full)
+ else
+ SiSU_Screen::Ansi.new(
+ 'v',
+ 'WARNING Document Title missing; provide @title:',
+ @md.fnb
+ ).warn unless @md.opt.act[:quiet][:set]==:on
+ end
+ end
+ s=s.gsub(/^(?:_[1-9]\*?|_\*)\s+/m,'').
+ gsub(/^(?:[1-9]\~(\S+)?)\s+/m,'').
+ gsub(/^(?::?[A-C]\~(\S+)?)\s+/m,'').
+ gsub(/^%{1,3} .+/m,''). #removed even if contained in code block
+ gsub(/<br>/m,' ')
+ #en << s.scan(/~\{\s*(.+?)\s*\}~/m)
+ s=s.gsub(/~\{.+?\}~/m,'').
+ gsub(/ \s+/m,' ')
+ ##special_character_escape(s)
+ #p s if s =~/[^ \nA-Za-z0-9'"`?!#@$%^&*=+,.;:\[\]()<>{}‹›|\\\/~_-]/
+ s
+ end
+ txt_arr << arr << en
+ txt=txt_arr.flatten.join("\n")
+ txt=special_character_escape(txt)
+ txt
+ end
+ def strip_markup(str) #define rules, make same as in dal clean
+ str=str.gsub(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]').
+ gsub(/(?:&nbsp\\;|#{Mx[:nbsp]})+/,' ').
+ gsub(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1'). #tables
+ gsub(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' '). #tables
+ gsub(/#{Mx[:tc_p]}/u,' '). #tables tidy later
+ gsub(/<.+?>/,'').
+ gsub(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] '). # else image names found in search
+ gsub(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]'). # else image names found in search
+ gsub(/\s\s+/,' ').
+ strip
+ end
+ def unique_words(str)
+ a=str.scan(/[a-zA-Z0-9\\\/_-]{2,}/) #a=str.scan(/\S+{2,}/)
+ str=a.uniq.sort.join(' ')
+ str
+ end
+ end
+end
+__END__