diff options
author | Ralph Amissah <ralph.amissah@gmail.com> | 2007-06-02 11:27:06 +0100 |
---|---|---|
committer | Ralph Amissah <ralph.amissah@gmail.com> | 2007-06-02 11:27:06 +0100 |
commit | 26767cc88c0548ad7978021796d0ccc4c9f7ffed (patch) | |
tree | fe225e99e180b5d2925cbf776826f74db27e1888 /lib/sisu/v0/shared_txt.rb | |
parent | restrict use to ruby1.8 branch, i.e. < 1.9 (diff) |
0.53.0, pre-build, see changelog, library naming changed for scm, placed under v0 (instead of 0.53)upstream/0.53.0sisu_0.53.0
Diffstat (limited to 'lib/sisu/v0/shared_txt.rb')
-rw-r--r-- | lib/sisu/v0/shared_txt.rb | 299 |
1 files changed, 299 insertions, 0 deletions
diff --git a/lib/sisu/v0/shared_txt.rb b/lib/sisu/v0/shared_txt.rb new file mode 100644 index 00000000..ddf2b275 --- /dev/null +++ b/lib/sisu/v0/shared_txt.rb @@ -0,0 +1,299 @@ +=begin + * Name: SiSU information Structuring Universe - Structured information, Serialized Units + * Author: Ralph Amissah + * http://www.jus.uio.no/sisu + * http://www.jus.uio.no/sisu/SiSU/download.html + + * Description: modules shared by flatfile output generators + + * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah + + * License: GPL 2 or later + + Summary of GPL 2 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + http://www.fsf.org/licenses/gpl.html + http://www.gnu.org/copyleft/gpl.html + http://www.jus.uio.no/sisu/gpl2.fsf + + SiSU was first released to the public on January 4th 2005 + + SiSU uses: + + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + © Ralph Amissah 1997, current 2007. + All Rights Reserved. + + * Ralph Amissah: ralph@amissah.com + ralph.amissah@gmail.com +=end +module SiSU_text_utils + class Wrap + def initialize(para='',n_char_max=76,n_indent=0,n_hang=nil) + @para,@n_char_max,@n_indent=para,n_char_max,n_indent + @br="\n" + @n_hang=unless n_hang; @n_hang=@n_indent + else n_hang + end + end + def line_wrap + space=' ' + spaces_indent,spaces_hang="#@br#{space*@n_indent}",space*@n_hang + line=0 + out=[] + out[line]='' + #line=0,out,out[line]=0,[],'' + #@para.gsub!(/<br(?: \/)?>/,"\n") #watch #added + words=@para.scan(/\S+/) + while words != '' + word=words.shift + if not word + out[line].strip!.squeeze!(' ') unless out[line].empty? #check + break + elsif (out[line].length + word.length) > (@n_char_max - @n_indent) and out[line] =~/\S+/ + out[line].strip!.squeeze!(' ') + line += 1 + end + out[line]="#{out[line]} #{word}" if word + end + out.join(spaces_indent).gsub(/\A\n+/m,'').insert(0,spaces_hang) + end + def line_wrap_indent1 + @n_indent,@n_hang=2,2 + line_wrap + end + def line_wrap_endnote + @n_indent,@n_hang=4,2 + line_wrap + end + end +#end +#module SiSU_scan + class Header_scan + def initialize(md,para) + @regxcl=/<~\d+;\w\d+;\w\d+><(?:[0-9a-f]{32}|[0-9a-f]{64}):(?:[0-9a-f]{32}|[0-9a-f]{64})>/ + para=para.gsub(@regxcl,'').dup + @md,@p=md,para + end + def extract(tag,tag_content,type,attrib) + dc=if dc_tag and dc_content + [dc_tag,dc_content,{dc_tag=>dc_content}] + else nil + end + end + def header(tag,tag_content,type='',attrib='') #this will break stuff and must be tested thoroughly 20060825 + @tag,@tag_content,@type,@attrib=tag,tag_content,type,attrib + def label #element + @tag + end + def type + @type + end + def text + @tag_content + end + def info #element text + @tag_content + end + def attribute + @attrib + end + def element + @tag + end + def attrib + @attrib + end + def el + @tag + end + self + end + def start_is_zero + meta=case @p + when /^0~(title)\s+(.+?)$/; header($1,@md.dc_title,'meta','dc') #dc 1 + #when /^0~(subtitle)\s+(.+?)$/; header($1,$2) + when /^0~(creator|author)\s+(.+?)$/; header('creator',$2,'meta','dc') #dc 2 + when /^0~(subject)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 3 + when /^0~(description)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 4 + when /^0~(publisher)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 5 + when /^0~(contributor)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 6 + when /^0~(date)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 7 + when /^0~(date\.created)\s+(.+?)$/; header($1,$2,'meta','extra') + when /^0~(date\.issued)\s+(.+?)$/; header($1,$2,'meta','extra') + when /^0~(date\.available)\s+(.+?)$/; header($1,$2,'meta','extra') + when /^0~(date\.valid)\s+(.+?)$/; header($1,$2,'meta','extra') + when /^0~(date\.modified)\s+(.+?)$/; header($1,$2,'meta','extra') + when /^0~(type)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 8 + when /^0~(format)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 9 + when /^0~(identifier)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 10 + when /^0~(source)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 11 + when /^0~(language)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 12 + when /^0~(relation)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 13 + when /^0~(coverage)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 14 + when /^0~(rights)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 15 + when /^0~(keywords)\s+(.+?)$/; header($1,$2,'meta','extra') + when /^0~(copyright)\s+(.+?)$/; header($1,$2,'meta','extra') + when /^0~(translator|translated_by)\s+(.+?)$/; header('translator',$2,'meta','extra') + when /^0~(illustrator|illustrated_by)\s+(.+?)$/; header('illustrator',$2,'meta','extra') + when /^0~(prepared_by)\s+(.+?)$/; header($1,$2,'meta','extra') + when /^0~(digitized_by)\s+(.+?)$/; header($1,$2,'meta','extra') + when /^0~(comments?)\s+(.+?)$/; header($1,$2,'meta','extra') + when /^0~(abstract)\s+(.+?)$/; header($1,$2,'meta','extra') + when /^0~(tags?)\s+(.+?)$/; header($1,$2,'meta','extra') + when /^0~(catalogue)\s+(.+?)$/; header($1,$2,'meta','extra') + when /^0~(class(?:ify)?_loc)\s+(.+?)$/; header('classify_loc',$2,'meta','extra') + when /^0~(class(?:ify)?_dewey)\s+(.+?)$/; header('classify_dewey',$2,'meta','extra') + when /^0~(class(?:ify)?_pg)\s+(.+?)$/; header('classify_pg',$2,'meta','extra') + when /^0~(class(?:ify)?_isbn)\s+(.+?)$/; header('classify_isbn',$2,'meta','extra') + when /^0~(toc|structure)\s+(.+?)$/; header('structure',$2,'meta','extra') + when /^0~(toc|structure)\s+(.+?)$/; header('structure',$2,'proc','instruct') + when /^0~(level|page|markup)\s+(.+?)$/; header('markup',$2,'process','instruct') + when /^0~(bold)\s+(.+?)$/; header($1,$2,'process','instruct') + when /^0~(italics|itali[sz]e)\s+(.+?)$/; header('italicize',$2,'process','instruct') + when /^0~(vocabulary|wordlist)\s+(.+?)$/; header('vocabulary',$2,'process','instruct') + when /^0~(skin)\s+(.+?)$/; header($1,$2,'process','instruct') + when /^0~(css|stylesheet)\s+(.+?)$/; header('css',$2,'process','instruct') + when /^0~(links)\s+(.+?)$/; header($1,$2,'process','instruct') + when /^0~(prefix)\s+(.+?)$/; header($1,$2,'process','instruct') + when /^0~(suffix)\s+(.+?)$/; header($1,$2,'process','instruct') + when /^0~(information)\s+(.+?)$/; header($1,$2,'process','instruct') + when /^0~(contact)\s+(.+?)$/; header($1,$2,'process','instruct') + when /^0~(rcs|cvs)\s+(.+?)$/; header('version',$2,'process','instruct') + else nil + end + end + def start_is_at + meta=case @p + when /^@(title):\s+(.+?)$/; header($1,@md.dc_title,'meta','dc') #dc 1 + #when /^@(subtitle):\s+(.+?)$/; header($1,$2,'meta','extra') + when /^@(creator|author):\s+(.+?)$/; header('creator',$2,'meta','dc') #dc 2 + when /^@(subject):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 3 + when /^@(description):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 4 + when /^@(publisher):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 5 + when /^@(contributor):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 6 + when /^@(date):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 7 + when /^@(date\.created):\s+(.+?)$/; header($1,$2,'meta','extra') + when /^@(date\.issued):\s+(.+?)$/; header($1,$2,'meta','extra') + when /^@(date\.available):\s+(.+?)$/; header($1,$2,'meta','extra') + when /^@(date\.valid):\s+(.+?)$/; header($1,$2,'meta','extra') + when /^@(date\.modified):\s+(.+?)$/; header($1,$2,'meta','extra') + when /^@(type):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 8 + when /^@(format):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 9 + when /^@(identifier):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 10 + when /^@(source):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 11 + when /^@(language):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 12 + when /^@(relation):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 13 + when /^@(coverage):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 14 + when /^@(rights):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 15 + when /^@(keywords):\s+(.+?)$/; header($1,$2,'meta','extra') + when /^@(copyright):\s+(.+?)$/; header($1,$2,'meta','extra') + when /^@(translator|translated_by):\s+(.+?)$/; header('translator',$2) + when /^@(illustrator|illustrated_by):\s+(.+?)$/; header('illustrator',$2) + when /^@(prepared_by):\s+(.+?)$/; header($1,$2,'meta','extra') + when /^@(digitized_by):\s+(.+?)$/; header($1,$2,'meta','extra') + when /^@(comments?):\s+(.+?)$/; header($1,$2,'meta','extra') + when /^@(abstract):\s+(.+?)$/; header($1,$2,'meta','extra') + when /^@(tags?):\s+(.+?)$/; header($1,$2,'meta','extra') + when /^@(catalogue):\s+(.+?)$/; header($1,$2,'meta','extra') + when /^@(class(?:ify)?_loc):\s+(.+?)$/; header('classify_loc',$2,'meta','extra') + when /^@(class(?:ify)?_dewey):\s+(.+?)$/; header('classify_dewey',$2,'meta','extra') + when /^@(class(?:ify)?_pg):\s+(.+?)$/; header('classify_pg',$2,'meta','extra') + when /^@(class(?:ify)?_isbn):\s+(.+?)$/; header('classify_isbn',$2,'meta','extra') + when /^@(toc|structure):\s+(.+?)$/; header('structure',$2,'process','instruct') + when /^@(level|page|markup):\s+(.+?)$/; header('markup',$2,'process','instruct') + when /^@(bold):\s+(.+?)$/; header($1,$2,'process','instruct') + when /^@(italics|itali[sz]e):\s+(.+?)$/; header('italicize',$2,'process','instruct') + when /^@(vocabulary|wordlist):\s+(.+?)$/; header('vocabulary',$2,'process','instruct') + when /^@(skin):\s+(.+?)$/; header($1,$2,'process','instruct') + when /^@(css|stylesheet):\s+(.+?)$/; header('css',$2,'process','instruct') + when /^@(links):\s+(.+?)$/; header($1,$2,'process','instruct') + when /^@(prefix):\s+(.+?)$/; header($1,$2,'process','instruct') #add a & b + when /^@(suffix):\s+(.+?)$/; header($1,$2,'process','instruct') + when /^@(information):\s+(.+?)$/; header($1,$2,'process','instruct') + when /^@(contact):\s+(.+?)$/; header($1,$2,'process','instruct') + when /^@(rcs|cvs):\s+(.+?)$/; header('version',$2,'process','instruct') + else nil + end + end + def dublin + out=if @p =~/^0~\S+\s/; start_is_zero + elsif @p =~/^@\S+:[+-]?\s/; start_is_at + else nil + end + end + def meta + out=if @p =~/^0~\S+\s/; start_is_zero + elsif @p =~/^@\S+:[+-]?\s/; start_is_at + else nil + end + end + end +end +module SiSU_text_parts_flatfile + class Split_text_object + @@dl=nil + attr_reader :format,:text,:ocn,:lev_para_ocn + def initialize(md,para) + @md,@para=md,para + @format,@ocn='null','null' + #@format,@ocn=nil,nil + @@dl ||=SiSU_Env::Info_env.new.digest.length + end + def lev_segname_para_ocn + if @para =~/^(\d~|<:.+?>).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>.*/ + if /^([1-6])~(\S+)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para) + @format,segname,@text,@ocn=$1,$2,$3,$4 + @format="#@format~#{segname}" # + elsif /^([1-6]~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para) + @format,@text,@ocn=$1,$2,$3 + elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para) + @format,@text,@ocn=$1,$2,$3 + elsif /^([1-6])~(\S+)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para) + @@alt_id_count+=1 + @format,segname,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}" + @format="#@format~#{segname}" # + elsif /^([1-6]~)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para) + @@alt_id_count+=1 + @format,@text,@ocn=$1,$2,"x#{@@alt_id_count}" + end + else + if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para) + @text,@ocn=$1,$2 + end + if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>|^$/ #added 2002w06 + @text=/(.+?)/m.match(@para)[1] + end + if /^(\d)~\S*\s+(.+)/m.match(@para) + @format,@text=$1,$2 + end + end + @lev_para_ocn=if @para =~/.+<~\d+>/ #hmmm, watch + Format::ParaSiSU.new(@md,@format,@text,@ocn) + else Format::ParaSiSU.new(@md,@format,@text,'<~0>') + end + self + end + end +end +__END__ + |