aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/0.52/shared_txt.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/0.52/shared_txt.rb')
-rw-r--r--lib/sisu/0.52/shared_txt.rb299
1 files changed, 299 insertions, 0 deletions
diff --git a/lib/sisu/0.52/shared_txt.rb b/lib/sisu/0.52/shared_txt.rb
new file mode 100644
index 00000000..bd0b41cc
--- /dev/null
+++ b/lib/sisu/0.52/shared_txt.rb
@@ -0,0 +1,299 @@
+=begin
+ * Name: SiSU information Structuring Universe - Structured information, Serialized Units
+ * Author: Ralph Amissah
+ * http://www.jus.uio.no/sisu
+ * http://www.jus.uio.no/sisu/SiSU/download.html
+
+ * Description: modules shared by flatfile output generators
+
+ * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah
+
+ * License: GPL 2 or later
+
+ Summary of GPL 2
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+
+ If you have Internet connection, the latest version of the GPL should be
+ available at these locations:
+ http://www.fsf.org/licenses/gpl.html
+ http://www.gnu.org/copyleft/gpl.html
+ http://www.jus.uio.no/sisu/gpl2.fsf
+
+ SiSU was first released to the public on January 4th 2005
+
+ SiSU uses:
+
+ * Standard SiSU markup syntax,
+ * Standard SiSU meta-markup syntax, and the
+ * Standard SiSU object citation numbering and system
+
+ © Ralph Amissah 1997, current 2007.
+ All Rights Reserved.
+
+ * Ralph Amissah: ralph@amissah.com
+ ralph.amissah@gmail.com
+=end
+module SiSU_text_utils
+ class Wrap
+ def initialize(para='',n_char_max=76,n_indent=0,n_hang=nil)
+ @para,@n_char_max,@n_indent=para,n_char_max,n_indent
+ @br="\n"
+ @n_hang=unless n_hang; @n_hang=@n_indent
+ else n_hang
+ end
+ end
+ def line_wrap
+ space=' '
+ spaces_indent,spaces_hang="#@br#{space*@n_indent}",space*@n_hang
+ line=0
+ out=[]
+ out[line]=''
+ #line=0,out,out[line]=0,[],''
+ #@para.gsub!(/<br(?: \/)?>/,"\n") #watch #added
+ words=@para.scan(/\S+/)
+ while words != ''
+ word=words.shift
+ if not word
+ out[line].strip!.squeeze!(' ') unless out[line].empty? #check
+ break
+ elsif (out[line].length + word.length) > (@n_char_max - @n_indent) and out[line] =~/\S+/
+ out[line].strip!.squeeze!(' ')
+ line += 1
+ end
+ out[line]="#{out[line]} #{word}" if word
+ end
+ out.join(spaces_indent).gsub(/\A\n+/m,'').insert(0,spaces_hang)
+ end
+ def line_wrap_indent1
+ @n_indent,@n_hang=2,2
+ line_wrap
+ end
+ def line_wrap_endnote
+ @n_indent,@n_hang=4,2
+ line_wrap
+ end
+ end
+#end
+#module SiSU_scan
+ class Header_scan
+ def initialize(md,para)
+ @regxcl=/<~\d+;\w\d+;\w\d+><(?:[0-9a-f]{32}|[0-9a-f]{64}):(?:[0-9a-f]{32}|[0-9a-f]{64})>/
+ para=para.gsub(@regxcl,'').dup
+ @md,@p=md,para
+ end
+ def extract(tag,tag_content,type,attrib)
+ dc=if dc_tag and dc_content
+ [dc_tag,dc_content,{dc_tag=>dc_content}]
+ else nil
+ end
+ end
+ def header(tag,tag_content,type='',attrib='') #this will break stuff and must be tested thoroughly 20060825
+ @tag,@tag_content,@type,@attrib=tag,tag_content,type,attrib
+ def label #element
+ @tag
+ end
+ def type
+ @type
+ end
+ def text
+ @tag_content
+ end
+ def info #element text
+ @tag_content
+ end
+ def attribute
+ @attrib
+ end
+ def element
+ @tag
+ end
+ def attrib
+ @attrib
+ end
+ def el
+ @tag
+ end
+ self
+ end
+ def start_is_zero
+ meta=case @p
+ when /^0~(title)\s+(.+?)$/; header($1,@md.dc_title,'meta','dc') #dc 1
+ #when /^0~(subtitle)\s+(.+?)$/; header($1,$2)
+ when /^0~(creator|author)\s+(.+?)$/; header('creator',$2,'meta','dc') #dc 2
+ when /^0~(subject)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 3
+ when /^0~(description)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 4
+ when /^0~(publisher)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 5
+ when /^0~(contributor)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 6
+ when /^0~(date)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 7
+ when /^0~(date\.created)\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^0~(date\.issued)\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^0~(date\.available)\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^0~(date\.valid)\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^0~(date\.modified)\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^0~(type)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 8
+ when /^0~(format)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 9
+ when /^0~(identifier)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 10
+ when /^0~(source)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 11
+ when /^0~(language)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 12
+ when /^0~(relation)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 13
+ when /^0~(coverage)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 14
+ when /^0~(rights)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 15
+ when /^0~(keywords)\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^0~(copyright)\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^0~(translator|translated_by)\s+(.+?)$/; header('translator',$2,'meta','extra')
+ when /^0~(illustrator|illustrated_by)\s+(.+?)$/; header('illustrator',$2,'meta','extra')
+ when /^0~(prepared_by)\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^0~(digitized_by)\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^0~(comments?)\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^0~(abstract)\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^0~(tags?)\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^0~(catalogue)\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^0~(class(?:ify)?_loc)\s+(.+?)$/; header('classify_loc',$2,'meta','extra')
+ when /^0~(class(?:ify)?_dewey)\s+(.+?)$/; header('classify_dewey',$2,'meta','extra')
+ when /^0~(class(?:ify)?_pg)\s+(.+?)$/; header('classify_pg',$2,'meta','extra')
+ when /^0~(class(?:ify)?_isbn)\s+(.+?)$/; header('classify_isbn',$2,'meta','extra')
+ when /^0~(toc|structure)\s+(.+?)$/; header('structure',$2,'meta','extra')
+ when /^0~(toc|structure)\s+(.+?)$/; header('structure',$2,'proc','instruct')
+ when /^0~(level|page|markup)\s+(.+?)$/; header('markup',$2,'process','instruct')
+ when /^0~(bold)\s+(.+?)$/; header($1,$2,'process','instruct')
+ when /^0~(italics|itali[sz]e)\s+(.+?)$/; header('italicize',$2,'process','instruct')
+ when /^0~(vocabulary|wordlist)\s+(.+?)$/; header('vocabulary',$2,'process','instruct')
+ when /^0~(skin)\s+(.+?)$/; header($1,$2,'process','instruct')
+ when /^0~(css|stylesheet)\s+(.+?)$/; header('css',$2,'process','instruct')
+ when /^0~(links)\s+(.+?)$/; header($1,$2,'process','instruct')
+ when /^0~(prefix)\s+(.+?)$/; header($1,$2,'process','instruct')
+ when /^0~(suffix)\s+(.+?)$/; header($1,$2,'process','instruct')
+ when /^0~(information)\s+(.+?)$/; header($1,$2,'process','instruct')
+ when /^0~(contact)\s+(.+?)$/; header($1,$2,'process','instruct')
+ when /^0~(rcs|cvs)\s+(.+?)$/; header('version',$2,'process','instruct')
+ else nil
+ end
+ end
+ def start_is_at
+ meta=case @p
+ when /^@(title):\s+(.+?)$/; header($1,@md.dc_title,'meta','dc') #dc 1
+ #when /^@(subtitle):\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^@(creator|author):\s+(.+?)$/; header('creator',$2,'meta','dc') #dc 2
+ when /^@(subject):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 3
+ when /^@(description):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 4
+ when /^@(publisher):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 5
+ when /^@(contributor):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 6
+ when /^@(date):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 7
+ when /^@(date\.created):\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^@(date\.issued):\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^@(date\.available):\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^@(date\.valid):\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^@(date\.modified):\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^@(type):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 8
+ when /^@(format):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 9
+ when /^@(identifier):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 10
+ when /^@(source):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 11
+ when /^@(language):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 12
+ when /^@(relation):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 13
+ when /^@(coverage):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 14
+ when /^@(rights):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 15
+ when /^@(keywords):\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^@(copyright):\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^@(translator|translated_by):\s+(.+?)$/; header('translator',$2)
+ when /^@(illustrator|illustrated_by):\s+(.+?)$/; header('illustrator',$2)
+ when /^@(prepared_by):\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^@(digitized_by):\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^@(comments?):\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^@(abstract):\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^@(tags?):\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^@(catalogue):\s+(.+?)$/; header($1,$2,'meta','extra')
+ when /^@(class(?:ify)?_loc):\s+(.+?)$/; header('classify_loc',$2,'meta','extra')
+ when /^@(class(?:ify)?_dewey):\s+(.+?)$/; header('classify_dewey',$2,'meta','extra')
+ when /^@(class(?:ify)?_pg):\s+(.+?)$/; header('classify_pg',$2,'meta','extra')
+ when /^@(class(?:ify)?_isbn):\s+(.+?)$/; header('classify_isbn',$2,'meta','extra')
+ when /^@(toc|structure):\s+(.+?)$/; header('structure',$2,'process','instruct')
+ when /^@(level|page|markup):\s+(.+?)$/; header('markup',$2,'process','instruct')
+ when /^@(bold):\s+(.+?)$/; header($1,$2,'process','instruct')
+ when /^@(italics|itali[sz]e):\s+(.+?)$/; header('italicize',$2,'process','instruct')
+ when /^@(vocabulary|wordlist):\s+(.+?)$/; header('vocabulary',$2,'process','instruct')
+ when /^@(skin):\s+(.+?)$/; header($1,$2,'process','instruct')
+ when /^@(css|stylesheet):\s+(.+?)$/; header('css',$2,'process','instruct')
+ when /^@(links):\s+(.+?)$/; header($1,$2,'process','instruct')
+ when /^@(prefix):\s+(.+?)$/; header($1,$2,'process','instruct') #add a & b
+ when /^@(suffix):\s+(.+?)$/; header($1,$2,'process','instruct')
+ when /^@(information):\s+(.+?)$/; header($1,$2,'process','instruct')
+ when /^@(contact):\s+(.+?)$/; header($1,$2,'process','instruct')
+ when /^@(rcs|cvs):\s+(.+?)$/; header('version',$2,'process','instruct')
+ else nil
+ end
+ end
+ def dublin
+ out=if @p =~/^0~\S+\s/; start_is_zero
+ elsif @p =~/^@\S+:[+-]?\s/; start_is_at
+ else nil
+ end
+ end
+ def meta
+ out=if @p =~/^0~\S+\s/; start_is_zero
+ elsif @p =~/^@\S+:[+-]?\s/; start_is_at
+ else nil
+ end
+ end
+ end
+end
+module SiSU_text_parts_flatfile
+ class Split_text_object
+ @@dl=nil
+ attr_reader :format,:text,:ocn,:lev_para_ocn
+ def initialize(md,para)
+ @md,@para=md,para
+ @format,@ocn='null','null'
+ #@format,@ocn=nil,nil
+ @@dl ||=SiSU_Env::Info_env.new.digest.length
+ end
+ def lev_segname_para_ocn
+ if @para =~/^(\d~|<:.+?>).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>.*/
+ if /^([1-6])~(\S+)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
+ @format,segname,@text,@ocn=$1,$2,$3,$4
+ @format="#@format~#{segname}" #
+ elsif /^([1-6]~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
+ @format,@text,@ocn=$1,$2,$3 #,$4
+ elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
+ @format,@text,@ocn=$1,$2,$3
+ elsif /^([1-6])~(\S+)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
+ @@alt_id_count+=1
+ @format,segname,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}"
+ @format="#@format~#{segname}" #
+ elsif /^([1-6]~)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
+ @@alt_id_count+=1
+ @format,@text,@ocn=$1,$2,"x#{@@alt_id_count}"
+ end
+ else
+ if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
+ @text,@ocn=$1,$2
+ end
+ if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>|^$/ #added 2002w06
+ @text=/(.+?)/m.match(@para)[1]
+ end
+ if /^(\d)~\S*\s+(.+)/m.match(@para)
+ @format,@text=$1,$2
+ end
+ end
+ @lev_para_ocn=if @para =~/.+<~\d+>/ #hmmm, watch
+ Format::ParaSiSU.new(@md,@format,@text,@ocn)
+ else Format::ParaSiSU.new(@md,@format,@text,'<~0>')
+ end
+ self
+ end
+ end
+end
+__END__
+