aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v2/db_sqltxt.rb
blob: f120b95f3a99103a1669150207a5341784eab351 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# coding: utf-8
=begin

 * Name: SiSU

 * Description: a framework for document structuring, publishing and search
   #___#

 * Author: Ralph Amissah

 * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
   2007, 2008, 2009, 2010 Ralph Amissah All Rights Reserved.

 * License: GPL 3 or later:

   SiSU, a framework for document structuring, publishing and search

   Copyright (C) Ralph Amissah

   This program is free software: you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the Free
   Software Foundation, either version 3 of the License, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   more details.

   You should have received a copy of the GNU General Public License along with
   this program. If not, see <http://www.gnu.org/licenses/>.

   If you have Internet connection, the latest version of the GPL should be
   available at these locations:
   <http://www.fsf.org/licensing/licenses/gpl.html>
   <http://www.gnu.org/copyleft/gpl.html>

   <http://www.jus.uio.no/sisu/gpl.fsf/toc.html>
   <http://www.jus.uio.no/sisu/gpl.fsf/doc.html>
   <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt>

 * SiSU uses:
   * Standard SiSU markup syntax,
   * Standard SiSU meta-markup syntax, and the
   * Standard SiSU object citation numbering and system

 * Hompages:
   <http://www.jus.uio.no/sisu>
   <http://www.sisudoc.org>

 * Download:
   <http://www.jus.uio.no/sisu/SiSU/download.html>

 * Ralph Amissah
   <ralph@amissah.com>
   <ralph.amissah@gmail.com>

 ** Description: system environment, resource control and configuration details

=end
module SiSU_DB_text
  class Prepare
    def special_character_escape(str)
      str.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'")
      str.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"<br />\n")
      str.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check
      str.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2')
      str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2')
      str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'\1')
      str
    end
    def clean_searchable_text(arr) #produce clean, searchable, plaintext from document source
      txt_arr,en=[],[]
      arr.each do |s|
        s.gsub!(/([*\/_-])\{(.+?)\}\1/,'\2')
        s.gsub!(/^(?:group|poem|code)\{/,''); s.gsub!(/^\}(?:group|poem|code)/,'')
        s.gsub!(/\A(?:@\S+:\s+.+)\Z/m,'')
        if s =~/^:A~/
          s.gsub!(/@author/,@md.creator.author)
          s.gsub!(/@title/,@md.title.full)
        end
        s.gsub!(/^(?:_[1-9]\*?|_\*)\s+/,'')
        s.gsub!(/^(?:[1-9]\~(\S+)?)\s+/,'')
        s.gsub!(/^(?::?[A-C]\~(\S+)?)\s+/,'')
        s.gsub!(/^%{1,3} .+/,'') #removed even if contained in code block
        s.gsub!(/<br>/,' ')
        en << s.scan(/~\{\s*(.+?)\s*\}~/)
        s.gsub!(/~\{.+?\}~/,'')
        s.gsub!(/ \s+/,' ')
        #special_character_escape(s)
        s
      end
      txt_arr << arr << en
      #txt_arr=txt_arr.flatten
      txt=txt_arr.flatten.join("\n")
      txt=special_character_escape(txt)
      txt
    end
    def strip_markup(str) #define rules, make same as in dal clean
      str.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]')
      str.gsub!(/(?:&nbsp\\;|#{Mx[:nbsp]})+/,' ')
      str.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1')         #tables
      str.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ')                          #tables
      str.gsub!(/#{Mx[:tc_p]}/u,' ')                                                     #tables tidy later
      str.gsub!(/<.+?>/,'')
      str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] ') # else image names found in search
      str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]') # else image names found in search
      str.gsub!(/\s\s+/,' ')
      str.strip!
      str
    end
  end
end
__END__