# encoding: utf-8 =begin * Name: SiSU ** Description: documents, structuring, processing, publishing, search *** metadata harvest, extract authors and their writings from document set ** Author: Ralph Amissah [ralph@amissah.com] [ralph.amissah@gmail.com] ** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Ralph Amissah, All Rights Reserved. ** License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see [http://www.gnu.org/licenses/]. If you have Internet connection, the latest version of the GPL should be available at these locations: [http://www.fsf.org/licensing/licenses/gpl.html] [http://www.gnu.org/licenses/gpl.html] ** SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system ** Hompages: [http://www.jus.uio.no/sisu] [http://www.sisudoc.org] ** Git [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary] [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/harvest_authors.rb;hb=HEAD] =end module SiSU_HarvestAuthors require_relative 'html_harvest_author_format' # html_harvest_author_format.rb require_relative 'html_parts' # html_parts.rb class Songsheet @@the_idx_authors={} def initialize(opt,env) @opt,@env=opt,env @file_list=opt.files end def songsheet idx_array={} @opt.f_pths.each do |y| lang_hash_file_array={} name=y[:f] filename=y[:pth] + '/' + y[:f] File.open(filename,'r') do |file| file.each_line("\n\n") do |line| if line =~/^@(?:title|creator|date):(?:\s|$)/m lang_hash_file_array[y[:lng_is]] ||= [] lang_hash_file_array[y[:lng_is]] << line elsif line =~/^@\S+?:(?:\s|$)/m \ or line =~/^(?:\s*\n|%+ )/ else break end end end lang_hash_file_array.each_pair do |lang,a| idx_array[lang] ||= [] idx_array=SiSU_HarvestAuthors::Harvest.new( @opt, @env, a, filename, name, idx_array, lang ).extract_harvest end end the_idx=SiSU_HarvestAuthors::Index.new( idx_array, @@the_idx_authors ).construct_book_author_index SiSU_HarvestAuthors::OutputIndex.new( @opt, the_idx ).html_print.html_songsheet end end class Harvest def initialize(opt,env,data,filename,name,idx_array,lang) @opt, @env,@data,@filename,@name,@idx_array,@lang= opt,env, data, filename, name, idx_array, lang end def extract_harvest data, filename, name, idx_array, lang = @data,@filename,@name,@idx_array,@lang @title=@subtitle=@fulltitle=@author=@author_format=@date=nil @authors=[] rgx={} rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m rgx[:title]=/^@title:[ ]+(.+)/ rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m data.each do |para| if para=~ rgx[:title] @title=rgx[:title].match(para)[1] end if para=~ rgx[:subtitle] @subtitle=rgx[:subtitle].match(para)[1] end if para=~ rgx[:author] @author_format=rgx[:author].match(para)[1] end if para=~ rgx[:date] @date=rgx[:date].match(para)[1] end break if @title && @subtitle && @author && @date end @fulltitle=@subtitle \ ? (@title + ' - ' + @subtitle) : @title if @title \ and @author_format creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details @authors,@authorship=creator[:authors],creator[:authorship] file=if name=~/~[a-z]{2,3}\.ss[mt]$/ name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') else name.sub(/\.ss[mt]$/,'') end page=if @env.output_dir_structure.by? == :language "#{lang}/sisu_manifest.html" else "sisu_manifest.#{lang}.html" end idx_array[lang] <<= { filename: filename, file: file, date: @date, title: @fulltitle, author: creator, page: page, lang: lang } else #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}" end idx_array[lang]=idx_array[lang].flatten idx_array end end class Index def initialize(idx_array,the_idx) @idx_array,@the_idx=idx_array,the_idx @@the_idx_authors=@the_idx end def capital(txt) txt[0].chr.capitalize + txt[1,txt.length] end def construct_book_author_index idx_array=@idx_array idx_array.each_pair do |lang,idx_arr| @@the_idx_authors[lang] ||= {} idx_arr.each do |idx| idx[:author][:last_first_format_a].each do |author| author=author.strip if @@the_idx_authors[lang][author].is_a?(NilClass) @@the_idx_authors[lang][author]={ md: [] } end @@the_idx_authors[lang][author][:md] << { filename: idx[:filename], file: idx[:file], author: idx[:author], title: idx[:title], date: idx[:date], page: idx[:page], lang: idx[:lang] } end end end @the_idx=@@the_idx_authors end end class OutputIndex require_relative 'i18n' # i18n.rb def initialize(opt,the_idx) @opt,@the_idx=opt,the_idx @env=SiSU_Env::InfoEnv.new @rc=SiSU_Env::GetInit.new.sisu_yaml.rc @alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] @alph=@alphabet_list.dup @letter=@alph.shift end def html_file_open @the_idx.keys.each do |lng| @output ||={} @output[lng] ||={} harvest_pth,file='','' if @env.output_dir_structure.by? == :language harvest_pth=@env.path.webserv + '/' \ + @opt.base_stub + '/' \ + lng + '/' \ + 'manifest' file="#{harvest_pth}/authors.html" elsif @env.output_dir_structure.by? == :filetype harvest_pth=@env.path.webserv + '/' \ + @opt.base_stub + '/' \ + 'manifest' file="#{harvest_pth}/authors.#{lng}.html" elsif @env.output_dir_structure.by? == :filename harvest_pth=@env.path.webserv + '/' \ + @opt.base_stub file="#{harvest_pth}/authors.#{lng}.html" end FileUtils::mkdir_p(harvest_pth) \ unless FileTest.directory?(harvest_pth) fileinfo=(@opt.act[:verbose][:set]==:on \ || @opt.act[:verbose_plus][:set]==:on \ || @opt.act[:urls_selected][:set]==:on \ || @opt.act[:maintenance][:set]==:on) \ ? ("file://#{file}") : '' SiSU_Screen::Ansi.new( @opt.act[:color_state][:set], "harvest authors (#{@opt.files.length} files)", fileinfo ).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on @output[lng][:html]=File.new(file,'w') end end def html_file_close @the_idx.keys.each do |lng| @output[lng][:html].close @output[lng][:html_mnt].close \ if @output[lng][:html_mnt].is_a?(File) end end def html_print def html_songsheet html_file_open html_head html_alph html_body html_tail html_file_close end def html_head_adjust(lng,type='') css_path,topics='','' if @env.output_dir_structure.by? == :language css_path=(type !~/maintenance/) \ ? '../../_sisu/css/harvest.css' : 'harvest.css' topics='topics.html' elsif @env.output_dir_structure.by? == :filetype css_path=(type !~/maintenance/) \ ? '../_sisu/css/harvest.css' : 'harvest.css' topics="topics.#{lng}.html" elsif @env.output_dir_structure.by? == :filename css_path=(type !~/maintenance/) \ ? './_sisu/css/harvest.css' : 'harvest.css' topics="topics.#{lng}.html" end ln=SiSU_i18n::Languages.new.language.list harvest_languages='' @the_idx.keys.each do |lg| if @env.output_dir_structure.by? == :language harvest_pth="../../#{lg}/manifest" file="#{harvest_pth}/authors.html" elsif @env.output_dir_structure.by? == :filetype harvest_pth='.' file="#{harvest_pth}/authors.#{lg}.html" elsif @env.output_dir_structure.by? == :filename harvest_pth='.' file="#{harvest_pth}/authors.#{lg}.html" end l=ln[lg][:t] harvest_languages += %{#{l}   } end sv=SiSU_Env::InfoVersion.instance.get_version if @env.output_dir_structure.by? == :language home_pth='../..' output_structure_by= '(output organised by language & filetype)' elsif @env.output_dir_structure.by? == :filetype home_pth='..' output_structure_by= '(output organised by filetype)' elsif @env.output_dir_structure.by? == :filename home_pth='.' output_structure_by= '(output organised by filename)' else home_pth='.' output_structure_by='(output organised by ?)' end < SiSU Metadata Harvest - Authors

SiSU Metadata Harvest - Authors #{output_structure_by}

[ HOME ] also see SiSU Metadata Harvest - Topics

#{@env.widget_static.search_form}


#{harvest_languages}


WOK end def html_head @the_idx.keys.each do |lng| @output[lng][:html_mnt] \ << html_head_adjust(lng,'maintenance') \ if @opt.act[:maintenance][:set]==:on @output[lng][:html] \ << html_head_adjust(lng) end end def html_alph a=[] a << '

' @alph.each do |x| a << ((x =~/[0-9]/) \ ? '' : %{#{x}, }) end a=a.join @the_idx.keys.each do |lng| @output[lng][:html_mnt] << a \ if @opt.act[:maintenance][:set]==:on @output[lng][:html] << a end end def html_tail a =< #{SiSU_Proj_HTML::Bits.new.credits_sisu} WOK @the_idx.keys.each do |lng| @output[lng][:html_mnt] << a \ if @output[lng][:html_mnt].is_a?(File) @output[lng][:html] << a end end def do_html(lng,html) @output[lng][:html_mnt] << html \ if @output[lng][:html_mnt].is_a?(File) @output[lng][:html] << html end def do_string_name(lng,attrib,string) f=/^(\S)/.match(string[0])[1] if @lng != lng @alph=@alphabet_list.dup @letter=@alph.shift @lng = lng end if @letter < f while @letter < f if @alph.length > 0 @letter=@alph.shift if @output[lng][:html_mnt].is_a?(File) @output[lng][:html_mnt] \ << %{\n

#{@letter}

} end @output[lng][:html] \ << %{\n

#{@letter}

} else break end end end end def html_body the_idx=@the_idx the_idx.each_pair do |lng,lng_array| lng_array.sort.each do |a| do_string_name(lng,'',a) name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') x = %{

#{a[0]}

} if @output[lng][:html_mnt].is_a?(File) @output[lng][:html_mnt] << x end @output[lng][:html] << x lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert works=[] a[1][:md].each do |i| manifest_at=if @env.output_dir_structure.by? == :language i[:file] + Sfx[:html] elsif @env.output_dir_structure.by? == :filetype i[:file] + lang_code_insert + Sfx[:html] elsif @env.output_dir_structure.by? == :filename './' + i[:file] + '/' + i[:page] else '' #error end work=[ "#{i[:date]} #{i[:title]}", %{

#{i[:date]} #{i[:title]}, #{i[:author][:authors_s]}

} ] works<<=(@output[lng][:html_mnt].is_a?(File)) \ ? (work.concat([%{

[src]  #{i[:date]} #{i[:title]}, #{i[:author][:authors_s]} -- [#{i[:file]}.sst]

}])) : work end works.sort_by {|y| y[0]}.each do |z| @output[lng][:html] << z[1] @output[lng][:html_mnt] << z[2] \ if @output[lng][:html_mnt].is_a?(File) end end end end self end def screen_print def cycle the_idx=@the_idx the_idx.sort.each do |a| puts a[0] a[1][:md].each do |x| puts "\t" + x[:file] end end end self end end end __END__