aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v2/harvest_authors.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/v2/harvest_authors.rb')
-rw-r--r--lib/sisu/v2/harvest_authors.rb316
1 files changed, 0 insertions, 316 deletions
diff --git a/lib/sisu/v2/harvest_authors.rb b/lib/sisu/v2/harvest_authors.rb
deleted file mode 100644
index dfb2b654..00000000
--- a/lib/sisu/v2/harvest_authors.rb
+++ /dev/null
@@ -1,316 +0,0 @@
-# coding: utf-8
-=begin
-
- * Name: SiSU
-
- * Description: a framework for document structuring, publishing and search
- metadata harvest, extract authors and their writings from document set
-
- * Author: Ralph Amissah
-
- * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved.
-
- * License: GPL 3 or later:
-
- SiSU, a framework for document structuring, publishing and search
-
- Copyright (C) Ralph Amissah
-
- This program is free software: you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the Free
- Software Foundation, either version 3 of the License, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program. If not, see <http://www.gnu.org/licenses/>.
-
- If you have Internet connection, the latest version of the GPL should be
- available at these locations:
- <http://www.fsf.org/licensing/licenses/gpl.html>
- <http://www.gnu.org/licenses/gpl.html>
-
- <http://www.jus.uio.no/sisu/gpl.fsf/toc.html>
- <http://www.jus.uio.no/sisu/gpl.fsf/doc.html>
- <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt>
-
- * SiSU uses:
- * Standard SiSU markup syntax,
- * Standard SiSU meta-markup syntax, and the
- * Standard SiSU object citation numbering and system
-
- * Hompages:
- <http://www.jus.uio.no/sisu>
- <http://www.sisudoc.org>
-
- * Download:
- <http://www.jus.uio.no/sisu/SiSU/download.html>
-
- * Ralph Amissah
- <ralph@amissah.com>
- <ralph.amissah@gmail.com>
-
- ** Description: simple xml representation (sax style)
-
-=end
-module HARVEST_authors
- require "#{SiSU_lib}/author_format" # author_format.rb
- @@the_idx_authors=[]
- class Songsheet
- def initialize(opt)
- @opt=opt
- @file_list=opt.files
- @env=SiSU_Env::Info_env.new
- end
- def songsheet
- files,idx_array=[],[]
- @file_list.each do |f|
- (f =~/.+?\.ss[tm]$/) \
- ? (files << f[/(.+?\.ss[tm])$/,1]) \
- : (print "not .sst or .ssm ? << #{f} >> ")
- end
- files.each do |filename|
- file_array=[]
- File.open(filename,'r') do |file|
- file.each_line("\n\n") do |line|
- if line =~/^@(?:title|creator|date):(?:\s|$)/m
- file_array << line
- elsif line =~/^@\S+?:(?:\s|$)/m \
- or line =~/^(?:\s*\n|%+ )/
- else break
- end
- end
- end
- idx_array=HARVEST_authors::Harvest.new(file_array,filename,idx_array).extract_harvest
- end
- the_idx=HARVEST_authors::Index.new(idx_array,@@the_idx_authors).construct_book_author_index
- HARVEST_authors::Output_index.new(@opt,the_idx).html_print.html_songsheet
- puts "file://#{@env.path.output_md_harvest}/harvest_authors.html"
- puts "file://#{@env.path.pwd}/harvest_authors.html" if @opt.cmd.inspect =~/M/
- end
- end
- class Harvest
- def initialize(data,filename,idx_array)
- @data,@filename,@idx_array=data,filename,idx_array
- end
- def extract_harvest
- data,filename,idx_array=@data,@filename,@idx_array
- @title,@subtitle,@fulltitle,@author,@author_format,@date=nil,nil,nil,nil,nil,nil
- @authors=[]
- rgx={}
- rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m
- rgx[:title]=/^@title:[ ]+(.+)/
- rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m
- rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m
- data.each do |para|
- if para=~ rgx[:title]
- @title=rgx[:title].match(para)[1]
- end
- if para=~ rgx[:subtitle]
- @subtitle=rgx[:subtitle].match(para)[1]
- end
- if para=~ rgx[:author]
- @author_format=rgx[:author].match(para)[1]
- end
- if para=~ rgx[:date]
- @date=rgx[:date].match(para)[1]
- end
- break if @title and @subtitle and @author and @date
- end
- @fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title
- if @title and @author_format
- creator=FORMAT::Author.new(@author_format.strip).author_details
- @authors,@authorship=creator[:authors],creator[:authorship]
- file=if filename=~/~[a-z]{2,3}\.ss[mt]$/
- lang='.' + /~([a-z]{2,3})\.ss[mt]$/.match(filename)[1]
- filename.sub(/~[a-z]{2,3}\.ss[mt]$/,'')
- else
- lang=''
- filename.sub(/\.ss[mt]$/,'')
- end
- page="sisu_manifest#{lang}.html"
- idx_array <<= { :filename => filename, :file => file, :date => @date, :title => @fulltitle, :author => creator, :page => page }
- else
- #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}"
- end
- idx_array.flatten!
- idx_array
- end
- end
- class Index
- def initialize(idx_array,the_idx)
- @idx_array,@the_idx=idx_array,the_idx
- @@the_idx_authors=@the_idx
- end
- def capital(txt)
- txt[0].chr.capitalize + txt[1,txt.length]
- end
- def construct_book_author_index
- idx_array=@idx_array
- idx_array.each do |idx|
- idx[:author][:last_first_format_a].each do |author|
- author.strip!
- if @@the_idx_authors[author].class==NilClass
- @@the_idx_authors[author]={:md => []}
- end
- @@the_idx_authors[author][:md] << { :filename => idx[:filename], :file => idx[:file], :author => idx[:author], :title => idx[:title], :date => idx[:date], :page => idx[:page] }
- end
- end
- @the_idx=@@the_idx_authors
- end
- end
- class Output_index
- def initialize(opt,the_idx)
- @opt,@the_idx=opt,the_idx
- @env=SiSU_Env::Info_env.new
- @rc=Get_init.instance.yamlrc
- @alph=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z]
- @letter=@alph.shift
- @vz=SiSU_Env::Get_init.instance.skin
- end
- def html_file_open
- @output={}
- @output[:html]=File.new("#{@env.path.output_md_harvest}/harvest_authors.html",'w')
- @output[:html_mnt]=(@opt.cmd.inspect =~/M/) \
- ? File.new("#{@env.path.pwd}/harvest_authors.html",'w') \
- : nil
- end
- def html_file_close
- @output[:html].close
- @output[:html_mnt].close if @output[:html_mnt].class==File
- end
- def html_print
- def html_songsheet
- html_file_open
- html_head
- html_alph
- html_body
- html_tail
- html_file_close
- end
- def html_head_adjust(type='')
- css_path=(type !~/maintenance/) \
- ? '../_sisu/css/harvest.css' \
- : 'harvest.css'
- sv=SiSU_Env::Info_version.instance.get_version
- <<WOK
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
-"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
-<head>
-<title>SiSU Metadata Harvest - Authors</title>
-<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
-<meta name="dc.title" content= "SiSU metadata harvest, Authors - SiSU information Structuring Universe, Structured information Serialised Units" />
-<meta name="dc.subject" content= "document structuring, ebook, publishing, PDF, LaTeX, XML, ODF, SQL, postgresql, sqlite, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, granular search, digital library" />
-<meta name="generator" content="#{sv[:project]} #{sv[:version]} of #{sv[:date_stamp]} (n*x and Ruby!)" />
-<link rel="generator" href="http://www.jus.uio.no/sisu/SiSU" />
-<link rel="stylesheet" href="#{css_path}" type="text/css" />
-<link rel="shortcut icon" href="../_sisu/image/rb7.ico" />
-</head>
-<body bgcolor="#ffffff" text="#000000" link="#003090" lang="en" xml:lang="en">
-<a name="top" id="top"></a>
-<a name="up" id="up"></a>
-<a name="start" id="start"></a>
-<h1>SiSU Metadata Harvest - Authors</h1>
-<p>[<a href="../index.html">&nbsp;HOME&nbsp;</a>] also see <a href="harvest_topics.html">SiSU Metadata Harvest - Topics</a></p>
-<p>#{@env.widget_static.search_form}</p>
-<hr />
-WOK
- end
- def html_head
- @output[:html_mnt] << html_head_adjust('maintenance') if @opt.cmd.inspect =~/M/
- @output[:html] << html_head_adjust
- end
- def html_alph
- a=[]
- a << '<p>'
- @alph.each do |x|
- a << ((x =~/[0-9]/) ? '' : %{<a href="##{x}">#{x}</a>,&nbsp;})
- end
- @output[:html_mnt] << a.join if @output[:html_mnt].class==File
- @output[:html] << a.join
- end
- def html_tail
- a=[]
- a <<<<WOK
-<hr />
-<a name="bottom" id="bottom"></a>
-<a name="down" id="down"></a>
-<a name="end" id="end"></a>
-<a name="finish" id="finish"></a>
-<a name="stop" id="stop"></a>
-<a name="credits"></a>
-#{@vz.credits_sisu}
-</body>
-</html>
-WOK
- @output[:html_mnt] << a if @output[:html_mnt].class==File
- @output[:html] << a
- end
- def do_html(html)
- @output[:html_mnt] << html if @output[:html_mnt].class==File
- @output[:html] << html
- end
- def do_string(attrib,string)
- html=%{<p class="#{attrib}">#{string}</p>}
- do_html(html)
- end
- def do_string_name(attrib,string)
- f=/^(\S)/.match(string[0])[1]
- if @letter < f
- while @letter < f
- if @alph.length > 0
- @letter=@alph.shift
- if @output[:html_mnt].class==File
- @output[:html_mnt] << %{\n<p class="letter"><a name="#{@letter}"></p>#{@letter}</a><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>}
- end
- @output[:html] << %{\n<p class="letter"><a name="#{@letter}">#{@letter}</a></p><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>}
- else break
- end
- end
- end
- end
- def html_body
- the_idx=@the_idx
- the_idx.sort.each do |a|
- do_string_name('',a)
- name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_')
- x = %{<p class="author"><a name="#{name}">#{a[0]}</a></p>}
- if @output[:html_mnt].class==File
- @output[:html_mnt] << x
- end
- @output[:html] << x
- works=[]
- a[1][:md].each do |x|
- work=[ "#{x[:date]} #{x[:title]}", %{<p class="publication">#{x[:date]} <a href="../#{x[:file]}/#{x[:page]}">#{x[:title]}</a>, #{x[:author][:authors_s]}</p>} ]
- works<<=(@output[:html_mnt].class==File) \
- ? (work.concat([%{<p class="publication">[<a href="#{x[:file]}.sst">src</a>]&nbsp;&nbsp;#{x[:date]} <a href="file://#{@env.path.output}/#{x[:file]}/#{x[:page]}">#{x[:title]}</a>, #{x[:author][:authors_s]} -- [<a href="#{x[:file]}.sst">#{x[:file]}.sst</a>]</p>}])) \
- : work
- end
- works.sort_by {|x| x[0]}.each do |x|
- @output[:html] << x[1]
- @output[:html_mnt] << x[2] if @output[:html_mnt].class==File
- end
- end
- end
- self
- end
- def screen_print
- def cycle
- the_idx=@the_idx
- the_idx.sort.each do |a|
- puts a[0]
- a[1][:md].each do |x|
- puts "\t" + x[:file]
- end
- end
- end
- self
- end
- end
-end
-__END__