# coding: utf-8 =begin * Name: SiSU * Description: a framework for document structuring, publishing and search metadata harvest, extract topics and associated writings from document set (topics use topic_register header) * Author: Ralph Amissah * Copyright: (C) 1997 - 2009 Ralph Amissah All Rights Reserved. * License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . If you have Internet connection, the latest version of the GPL should be available at these locations: * SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system * Hompages: * Download: * Ralph Amissah ** Description: simple xml representation (sax style) =end module HARVEST_topics require "#{SiSU_lib}/author_format" class Songsheet def initialize(opt) @opt=opt @file_list=opt.files @env=SiSU_Env::Info_env.new end def songsheet files,idx_array=[],[] @file_list.each do |f| if f =~/.+?\.ss[tm]$/ files << f[/(.+?\.ss[tm])$/,1] else print "not .sst or .ssm ? << #{f} >> " end end files.each do |filename| file_array=[] File.open(filename,'r') do |file| file.each_line("\n\n") do |line| if line =~/^@\S+?: / #line=line.gsub(/\n/,' ') file_array << line elsif line =~/^(?:\s*\n|%+ )/ else break end end end #file_array=IO.readlines("#{filename}","\n\r") idx_array=HARVEST_topics::Harvest.new(file_array,filename,idx_array).extract_harvest end the_idx=HARVEST_topics::Index.new(idx_array,@@the_idx_topics).construct_book_topic_index #HARVEST_topics::Output_index.new(the_idx).screen_print.cycle HARVEST_topics::Output_index.new(@opt,the_idx).html_print.html_songsheet puts "file://#{@env.path.output_md_harvest}/harvest_topics.html" puts "file://#{@env.path.pwd}/harvest_topics.html" if @opt.cmd.inspect =~/-M/ end end class Harvest def initialize(data,filename,idx_array) @data,@filename,@idx_array=data,filename,idx_array end def extract_harvest data,filename,idx_array=@data,@filename,@idx_array @idx_lst,@title,@subtitle,@fulltitle,@author,@author_format=nil,nil,nil,nil,nil,nil rgx={} rgx[:author]=/^@(?:author|creator):\s+(.+)/ rgx[:title]=/^@title:\s+(.+)/ rgx[:subtitle]=/^@subtitle:\s+(.+)/ rgx[:idx]=/^@topic_register:\s+(.+)/ data.each do |para| if para=~ rgx[:idx] @idx_list=rgx[:idx].match(para)[1] end if para=~ rgx[:title] @title=rgx[:title].match(para)[1] end if para=~ rgx[:subtitle] @subtitle=rgx[:subtitle].match(para)[1] end if para=~ rgx[:author] @author_format=rgx[:author].match(para)[1] end break if @title and @subtitle and @author and @idx_lst end @fulltitle=if @subtitle @title + ' - ' + @subtitle else @title end if @title and @author_format and @idx_list creator=FORMAT::Author.new(@author_format.strip).author_details @authors,@authorship=creator[:authors],creator[:authorship] file=if filename=~/~[a-z]{2,3}\.ss[mt]$/ lang='.' + /~([a-z]{2,3})\.ss[mt]$/.match(filename)[1] filename.sub(/~[a-z]{2,3}\.ss[mt]$/,'') else lang='' filename.sub(/\.ss[mt]$/,'') end page="sisu_manifest#{lang}.html" idx_array <<=if @idx_list =~/;/ g=@idx_list.scan(/[^;]+/) idxl=[] g.each do |i| i.strip! idxl << { :filename => filename, :file => file, :rough_idx => i, :title => @fulltitle, :author => creator, :page => page} end idxl else { :filename => filename, :file => file, :rough_idx => @idx_list, :title => @fulltitle, :author => creator, :page => page } end else p "missing author field: #@filename title: #@title; author: #@author_format; idx: #@idx_list" end idx_array.flatten! idx_array end end class Index def initialize(idx_array,the_idx) @idx_array,@the_idx=idx_array,the_idx @@the_idx_topics=@the_idx end def capital(txt) txt[0].chr.capitalize + txt[1,txt.length] end def contents(hash,idx) names='' idx[:author][:last_first_format_a].each do |n| s=n.sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') names += %{#{n}, } end hash << { :filename => idx[:filename], :file => idx[:file], :author => names, :title => idx[:title], :page => idx[:page] } end def construct_book_topic_index idx_array=@idx_array idx_array.each do |idx| @lv0,@lv1,@lv2,@lv3,@lv4={},{},{},{},{} if idx[:rough_idx] idx_lst=idx[:rough_idx].scan(/[^:]+/) else puts "no topic register in: << #{idx[:filename]} >>" next end idx_lst_alt=[] idx_lst.each {|lev| idx_lst_alt << lev.scan(/[^|]+/)} depth = idx_lst_alt.length - 1 range = 0..depth range.each do |t| if idx_lst_alt[t] case t when 0 lev0=idx_lst_alt[t] lev0.each do |lv0| lv0=capital(lv0) if @@the_idx_topics[lv0].class==NilClass @@the_idx_topics[lv0]={:md => []} end @lv0=lv0 if lev0.length == 1 j=@@the_idx_topics[lv0][:md] contents(j,idx) if idx_lst_alt.length - 1 == t end when 1 lev1=idx_lst_alt[t] lev1.each do |lv1| lv1=capital(lv1) if @@the_idx_topics[@lv0][lv1].class==NilClass @@the_idx_topics[@lv0][lv1]={:md => []} end @lv1=lv1 if lev1.length == 1 j=@@the_idx_topics[@lv0][lv1][:md] contents(j,idx) if idx_lst_alt.length - 1 == t end when 2 lev2=idx_lst_alt[t] lev2.each do |lv2| lv2=capital(lv2) if @@the_idx_topics[@lv0][@lv1][lv2].class==NilClass @@the_idx_topics[@lv0][@lv1][lv2]={:md => []} end @lv2=lv2 if lev2.length == 1 j=@@the_idx_topics[@lv0][@lv1][lv2][:md] contents(j,idx) if idx_lst_alt.length - 1 == t end when 3 lev3=idx_lst_alt[t] lev3.each do |lv3| lv3=capital(lv3) if @@the_idx_topics[@lv0][@lv1][@lv2][lv3].class==NilClass @@the_idx_topics[@lv0][@lv1][@lv2][lv3]={:md => []} end @lv3=lv3 if lev3.length == 1 j=@@the_idx_topics[@lv0][@lv1][@lv2][lv3][:md] contents(j,idx) if idx_lst_alt.length - 1 == t end when 4 lev4=idx_lst_alt[t] lev4.each do |lv4| lv4=capital(lv4) if @@the_idx_topics[@lv0][@lv1][@lv2][@lv3][lv4].class==NilClass @@the_idx_topics[@lv0][@lv1][@lv2][@lv3][lv4]={:md => []} end @lv4=lv4 if lev4.length == 1 j=@@the_idx_topics[@lv0][@lv1][@lv2][@lv3][lv4][:md] contents(j,idx) if idx_lst_alt.length - 1 == t end end end end end @the_idx end end class Output_index def initialize(opt,the_idx) @opt,@the_idx=opt,the_idx @env=SiSU_Env::Info_env.new @rc=Get_init.instance.yamlrc @alph=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] @letter=@alph.shift @vz=SiSU_Env::Get_init.instance.skin end def html_file_open @output={} @output[:html]=File.new("#{@env.path.output_md_harvest}/harvest_topics.html",'w') if @opt.cmd.inspect =~/-M/ @output[:html_mnt]=File.new("#{@env.path.pwd}/harvest_topics.html",'w') end end def html_file_close @output[:html].close @output[:html_mnt].close if @output[:html_mnt].class == File end def html_print def html_songsheet html_file_open html_head html_alph html_body html_tail html_file_close end def html_head_adjust(type='') css_path=if type !~/maintenance/ '../_sisu/css/harvest.css' else 'harvest.css' end sv=SiSU_Env::Info_version.instance.get_version < SiSU Metadata Harvest - Topics

SiSU Metadata Harvest - Topics

[ HOME ] also see SiSU Metadata Harvest - Authors


WOK end def html_head @output[:html_mnt] << html_head_adjust('maintenance') if @opt.cmd.inspect =~/-M/ @output[:html] << html_head_adjust end def html_alph a=[] a << '

' @alph.each do |x| a << if x =~/[0-9]/; '' else %{#{x}, } end end @output[:html_mnt] << a if @opt.cmd.inspect =~/-M/ @output[:html] << a.join end def html_tail a=[] a <<< #{@vz.credits_sisu} WOK @output[:html_mnt] << a if @output[:html_mnt].class == File @output[:html] << a end def do_html(html) @output[:html] << html end def do_html_maintenance(html) @output[:html_mnt] << html if @output[:html_mnt].class == File end def do_string(attrib,string) html=%{

#{string}

} do_html(html) do_html_maintenance(html) if @output[:html_mnt].class == File end def do_string_default(attrib,string) html=%{

#{string}

} do_html(html) end def do_string_maintenance(attrib,string) html=%{

#{string}

} do_html_maintenance(html) if @output[:html_mnt].class == File end def do_string_name(attrib,string) f=/^(\S)/.match(string)[1] if @letter < f while @letter < f if @alph.length > 0 @letter=@alph.shift if @output[:html_mnt].class == File @output[:html_mnt] << %{\n

#{@letter}

} end @output[:html] << %{\n

#{@letter}

} else break end end end name=string.strip.gsub(/\s+/,'_') html=%{

#{string}

} do_html(html) do_html_maintenance(html) if @output[:html_mnt].class == File end def do_array(lv,array) lv+=1 array.each do |b| do_case(lv,b) end end def do_hash_md(attrib,hash) html=%{#{hash[:title]} - #{hash[:author]}} do_string_default(attrib,html) end def do_hash_md_maintenance(attrib,hash) if @output[:html_mnt].class == File #should not be run for presentation output html=%{[src]  #{hash[:title]} - #{hash[:author]}} do_string_maintenance(attrib,html) end end def do_hash(lv,hash) lv+=1 key=[] hash.each_key do |m| if m == :md do_case(lv,hash[m]) elsif m != :title and m != :author and m != :filename and m != :file and m != :rough_idx and m != :page key << m elsif m == :title do_hash_md('work',hash) do_hash_md_maintenance('work',hash) end end if key.length > 0 key.sort.each do |m| attrib="lev#{lv}" if lv == 0 do_string_name(attrib,m) else do_string(attrib,m) end do_case(lv,hash[m]) end end end def do_case(lv,a) y = a.class case when y == String attrib="lev#{lv}" if lv == 0 do_string_name(attrib,a) else do_string(attrib,a) end #do_string_name(attrib,a) when y == Array do_array(lv,a) when y == Hash do_hash(lv,a) end end def html_body the_idx=@the_idx the_idx.sort.each do |a| do_case(-1,a) end end self end def screen_print def do_string(lv,string) s=' '*4 puts s*lv + string end def do_array(lv,array) lv+=1 array.each do |b| do_case(lv,b) end end def do_hash_md(lv,hash) string=hash[:title] + ' - ' + hash[:author] do_string(lv,string) end def do_hash(lv,hash) lv+=1 key=[] hash.each_key do |m| if m == :md do_case(lv,hash[m]) elsif m != :title and m != :author and m != :filename and m != :file and m != :rough_idx and m != :page key << m elsif m == :title do_hash_md(lv,hash) end end if key.length > 0 key.sort.each do |m| do_string(lv,m) do_case(lv,hash[m]) end end end def do_case(lv,a) s=' '*4 y = a.class case when y == String do_string(lv,a) when y == Array do_array(lv,a) when y == Hash do_hash(lv,a) end end def cycle the_idx=@the_idx the_idx.each do |a| do_case(-1,a) end end self end def screen_print_unsorted def do_string(lv,string) s=' '*4 puts s*lv + string end def do_array(lv,array) lv+=1 array.each do |b| do_case(lv,b) end end def do_hash_md(lv,hash) string=hash[:title] + ' - ' + hash[:author] do_string(lv,string) end def do_hash(lv,hash) lv+=1 hash.each_key do |m| if m == :md do_case(lv,hash[m]) else if m != :title and m != :author and m != :filename and m != :file and m != :rough_idx and m != :page do_string(lv,m) do_case(lv,hash[m]) elsif m == :title do_hash_md(lv,hash) else end end end end def do_case(lv,a) s=' '*4 y = a.class case when y == String do_string(lv,a) when y == Array do_array(lv,a) when y == Hash do_hash(lv,a) end end def cycle the_idx=@the_idx the_idx.each do |a| do_case(-1,a) end end self end end end __END__ terms -|_ t{tl1} -|_ {fa}[fa]{filenames and other details} | |_ {tl2} -|_ {fa}[fa]{filenames and other details} | | |_{tl3} -|_ {fa}[fa]{filenames and other details} | | | |_{tl4} - {fa}[fa]{filenames and other details} | | | | | | | |_{tl4a} - {fa}[fa]{filenames and other details} | | | | | | | |_{tl4b} - {fa}[fa]{filenames and other details} | | | | | | | |_ ... | | | | | |_{tl3a} - {fa}[fa]{filenames and other details} | | | |_{tl2a} - {fa}[fa]{filenames and other details} | |_ t{tl1a} -|_ {fa}[fa]{filenames and other details} |_ ...