Statistics
| Revision:

root / lib / bibtex.rb @ 16

History | View | Annotate | Download (17.3 KB)

1
require 'fileutils'
2
require 'tempfile'
3
require 'rubygems'
4
require 'open4' # gem
5
require 'logger'
6
require 'erb'
7
8
# parse bibtex files and render data
9
module BibTeX
10
11
  # --- TODO: put the following in a module
12
13
  if defined?(RAILS_ROOT)
14
    ROOT=RAILS_ROOT
15
  else
16
    ROOT='/tmp'
17
  end
18
19
  FileUtils::mkdir(File.join(ROOT, '/log')) rescue nil
20
  FileUtils::mkdir(File.join(ROOT, '/tmp')) rescue nil
21
  
22
  # find path to executable 
23
  def BibTeX.which(program)
24
    path=%x(which #{program}).strip
25
    raise "ERROR: '#{program}' not available" if path.nil? || path.length==0
26
    @@log.info "found '#{program}' at '#{path}'"
27
    path
28
  end
29
30
  # execute a command and raise a message on error
31
  def BibTeX.execute(command,desc=nil)
32
    @@log.info "executing #{command}"
33
    pid, stdin, stdout, stderr = Open4::popen4(command)
34
    ignored, status = Process::waitpid2(pid)
35
    err = stderr.readlines.join # ("\n")
36
    output = stdout.readlines.join
37
    [stdin,stdout,stderr].each{|pipe| pipe.close}
38
    @@log.info "status=#{status}"
39
    @@log.info "err=#{err}"
40
    @@log.info "output=#{output}"
41
    
42
    info={ 
43
      :desc => desc, :command => command, :status => status, 
44
      :stderr => err, :stdout => output 
45
    }
46
    
47
    if block_given?
48
      rv=yield info
49
    else
50
      rv=output
51
      raise if status.exitstatus==1
52
    end
53
    rv
54
  end
55
  
56
  # the bibtex log
57
  def BibTeX.log
58
    @@log
59
  end
60
61
  # the bibtex log
62
  def BibTeX.tmpdir
63
    @@tmpdir
64
  end
65
66
  # ---
67
68
  @@log=Logger.new(File.join(ROOT, '/log/bibtex.log')) 
69
  @@tmpdir=File.join(ROOT, '/tmp')
70
71
  RULES_LATEX2HTML = 
72
    [
73
     [ /\{([aeiouAEIOU])\}/, '\1' ],
74
75
     [ /\{\\\"([aouAOU])\}/ , '&\1uml;' ],
76
     [ /\\?\"([aouAOU])/ , '&\1uml;' ],
77
                  
78
     [ /\{((\\\"s)|(\\3))\}/ , 'ß' ],    
79
     [ /(\"s|\\3|\\\"s)/, 'ß' ],
80
81
     [ /\{\\\'([aeiouAEIOU])\}/, '&\1acute;' ],
82
     [ /\\?\'([aeiouAEIOU])/ , '&\1acutel;' ],
83
 
84
     [ /\{\\\^([aeiouAEIOU])\}/, '&\1circ;' ],
85
     [ /\\?\^([aeiouAEIOU])/ , '&\1circ;' ],
86
      
87
     [ /\{(.*)\\em\s+(.*)\}/mx, '\1<em>\2</em>' ],
88
89
     [ /([^\\])\{/, '\1'], [ /([^\\])\}/,'\1'], [/~/,' ']     
90
  ]
91
92
  # convert to html
93
  def BibTeX.latex2html(text)
94
    rv=text.dup
95
    RULES_LATEX2HTML.each do |pr|
96
      rv=rv.gsub(pr[0],pr[1])
97
    end
98
    rv
99
  end  
100
101
  RULES_LATEX2TXT = 
102
    [
103
     [ /\{([aeiouAEIOU])\}/, '\1' ],
104
105
     [ /\{\\\"([aouAOU])\}/ , '&\1e' ],
106
     [ /\\?\"([aouAOU])/ , '&\1e' ],
107
                  
108
     [ /\{((\\\"s)|(\\3))\}/ , 'ss;' ],    
109
     [ /(\"s|\\3|\\\"s)/, 'ss' ],
110
111
     [ /\{\\\'([aeiouAEIOU])\}/, '\1' ],
112
     [ /\\?\'([aeiouAEIOU])/ , '\1' ],
113
 
114
     [ /\{\\\^([aeiouAEIOU])\}/, '\1;' ],
115
     [ /\\?\^([aeiouAEIOU])/ , '\1' ],
116
      
117
     [ /\{(.*)\\em\s+(.*)\}/mx, '\1\2' ],
118
119
     [ /([^\\])\{/, '\1'], [ /([^\\])\}/,'\1'],[/~/,' ']
120
  ]
121
122
  # convert to ASCII text (no special characters)
123
  def BibTeX.latex2txt(text)
124
    rv=text.dup
125
    RULES_LATEX2TXT.each do |pr|
126
      rv=rv.gsub(pr[0],pr[1])
127
    end
128
    rv
129
  end  
130
131
132
  # Lexical scanner for BibTeX files (pretty general and reusable)
133
  class BibLex
134
    attr_reader :text
135
    attr_reader :offset, :length, :token
136
    
137
    # scan text
138
    def initialize(text)
139
      @text=text
140
      @len=text.length
141
      @head=0
142
      
143
      define_rule /\A(@[A-Za-z]+)/, :type
144
      
145
      define_rule /\A(\\\\)/, :data
146
      define_rule /\A(\\\")/, :data
147
      define_rule /\A(\\\{)/, :data
148
      define_rule /\A(\\\})/, :data
149
      define_rule /\A(\\,)/, :data
150
      
151
      define_rule /\A(\{)/, :lbrace
152
      define_rule /\A(\})/, :rbrace
153
      define_rule /\A(\")/, :quote
154
      define_rule /\A(,)/, :comma
155
      
156
      define_rule /\A(=)/, :equal    
157
      define_rule /\A([A-Za-z][A-Za-z0-9_$:]+)/, :id    
158
      define_rule /\A([^{}\",= ]+)/, :data
159
    end
160
    
161
    # a rule in BibLex
162
    class Rule
163
      # rule regexp -> token
164
      def initialize(regexp,token)
165
        @regexp=regexp
166
        @token=token
167
      end
168
      
169
      # match against input returns nil or [ token,text ]
170
      def match(input)
171
        match_data=@regexp.match(input)   
172
        if match_data
173
          [ @token, match_data[0] ]
174
        else
175
          nil
176
        end
177
      end
178
    end
179
    
180
    # get next token
181
    def next_token
182
      @offset=nil
183
      @length=nil
184
      @token=nil
185
      
186
      eat
187
      
188
      return :eoi if  input.nil? || input.length==0
189
      
190
      @rules.each do |r|
191
        m=r.match(input)
192
        if m
193
          @token=m[0]
194
          #puts m.inspect
195
          
196
          @offset=@head
197
          @length=m[1].length
198
          @head+=@length
199
          
200
          break
201
        end
202
      end   
203
      
204
      @token
205
    end
206
    
207
    # get matched text
208
    def token_text
209
      nil if @offset.nil?
210
      @text[@offset,@length]
211
    end
212
    
213
    # remaining input
214
    def input
215
      @text[@head,@len]
216
    end
217
    
218
    # end of input reached?
219
    def eoi?
220
      input.nil? || input.length==0
221
    end
222
    
223
    # define a BibLex::Rule
224
    def define_rule(regexp,token)
225
      @rules=@rules || Array.new
226
      @rules << Rule.new(regexp,token)
227
    end  
228
    
229
    private :define_rule
230
    
231
    # consume whitespace, return true/false
232
    def eat_whitespace
233
      h=@head
234
      @head+=1 while input=~/\A\s/
235
      h!=@head
236
    end
237
    
238
    private :eat_whitespace
239
240
    # consume BibTex comments "%...\n", return true/false
241
    def eat_comment
242
      h=@head
243
      @head+=$1.length while input=~/\A(%.*\n)/
244
      h!=@head
245
    end
246
    
247
    private :eat_comment
248
    
249
    # iterate eat_whitespace and eat_comment
250
    def eat
251
      while (eat_whitespace || eat_comment) do end    
252
    end
253
    
254
    private :eat
255
256
  end # BibLex
257
258
  # simple BibTex parser relying on BibLex
259
  class BibParse
260
    
261
    # BibLex object defines input, bibdata is output
262
    def initialize(biblex,bibtexdata,from_source=nil)
263
      @lex=biblex # communication exclusively via @lookahead and self#next_token
264
      @output=bibtexdata
265
      @entry=nil
266
      @from_source=from_source || 'source unknown'
267
      next_token
268
    end
269
    
270
    # parse input
271
    def parse
272
      collection
273
    end
274
    
275
    protected
276
    
277
    # read next token through @lookahead (do not use @lex.next_token directly!)
278
    def next_token
279
      @lookahead=@lex.next_token
280
    end
281
    
282
    # return text or raise error 
283
    def expect(token)
284
      raise "expected '#{token.to_s}' got '#{@lex.token_text}'" if @lookahead!=token
285
      text=@lex.token_text
286
      next_token
287
      text
288
    end
289
    
290
    # return [token,text] or raise error 
291
    def expect_one_of(*token)
292
      token.find do |t|
293
        if @lookahead==t
294
          rval=[@lookahead,@lex.token_text]
295
          next_token
296
          return rval
297
        end
298
      end
299
      raise "expected one of '#{token.map { |s| s.to_s+' '}.to_s}' got '#{@lex.token_text}'"
300
    end
301
    
302
    # read collection
303
    def collection
304
      while record do end
305
    end
306
    
307
    # read record in collection
308
    def record
309
      return nil if @lookahead==:eoi
310
      
311
      offset=@lex.offset
312
      
313
      type=expect(:type).downcase
314
      
315
      expect :lbrace
316
      id=expect :id
317
      
318
      @entry=BibTeXData::Entry.new(id,type)
319
      @entry.add_field('$from_source',@from_source)
320
      @entry.add_field('$source_offset',offset)
321
      
322
      expect :comma
323
      
324
      datafields       
325
      
326
      expect :rbrace
327
      
328
      
329
      ofs=@lex.offset
330
      ofs=@lex.text.length if ofs.nil?
331
      length=ofs-offset
332
      @entry.add_field('$source_length',length)
333
      @entry.add_field('$source',@lex.text[offset,length]);
334
      
335
      @output.add @entry.dup
336
      @entry=nil
337
      
338
      true
339
    end
340
    
341
    # read datafields in record
342
    def datafields
343
      while (f=field) && @lookahead==:comma do
344
        @entry.add_field(f[0],f[1])
345
        if next_token==:rbrace
346
          break
347
        end
348
      end
349
    end
350
    
351
    # read field in datafields, return [key,data]
352
    def field
353
      key=expect :id
354
      key.downcase!
355
      
356
      expect :equal
357
      
358
      data=''
359
      
360
      delim=expect_one_of(:quote,:lbrace)
361
      
362
      if delim[0]==:quote
363
        while @lookahead!=:quote
364
          data+=@lex.token_text+' '
365
        end
366
      else
367
        n=1
368
        while n!=0 && @lookahead!=:eoi
369
          case @lookahead
370
          when :lbrace 
371
            n+=1
372
            data+='{'
373
          when :rbrace 
374
            n-=1
375
            data+='}' if n>0
376
          else 
377
            data+=@lex.token_text+' '
378
          end
379
          next_token
380
        end
381
        raise "missing '}' near '@{key}=#{data[0,20]}...}" if n>0
382
      end
383
384
      data=data.strip.
385
        gsub(/ ,/,',').gsub(/ \./,'.').gsub(/\n/,' ').gsub(/  /,' ').
386
        gsub(/ \{/,'{').gsub(/ \}/,'}').gsub(/\" /,'"').gsub(/ -/,'-').gsub(/ '/,'\'')   
387
      
388
      [key,data]
389
    end    
390
  end # BibParse
391
392
  
393
  # BibTeX database
394
  class BibTeXData < Hash 
395
396
    @@bibtex=BibTeX.which('bibtex')
397
    @@latex=BibTeX.which('latex')
398
    
399
    # BibTex entry in collection
400
    class Entry < Hash
401
      # create new entry
402
      def initialize(id,type,fields=nil)      
403
        self['$id']=id
404
        self['$type']=type
405
        if fields
406
          fields.each do |key,value|
407
            self[key]=value
408
          end
409
        end
410
      end
411
      
412
      # add a field
413
      def add_field(key,data)
414
        self[key]=data
415
      end
416
417
      # order used by to_bib (if :predicate given)
418
      ORDER = [ 'author','title','editor','booktitle','editor',
419
                'series','volume','number',
420
                'publisher',
421
                'address','month','year','pages',
422
                'organization','type','isbn','paddress','note','abstract' ]               
423
424
      # predicate to to_bib (using ORDER)
425
      def sort_predicate(a,b)
426
        ia=ORDER.index(a)
427
        ib=ORDER.index(b)
428
        if ia.nil?
429
          if ib.nil? then a <=> b else -1 end
430
        elsif ib.nil?
431
          +1
432
        else
433
          ia <=> ib
434
        end
435
      end
436
437
      private :sort_predicate
438
439
      # NOTE: This sorting on the fly is inefficient. 
440
      
441
      # restore BibTeX syntax from fields
442
      # options is a Hash:
443
      # - :suppress => [] # array of keys to suppress
444
      # - :sort_predicate => predicate
445
      # - :sort use default sort_predicate if +:sort_predicate=>nil+ (igrnored otherwise)
446
      def to_bib(options={ :sort_predicate => lambda {|a,b| sort_predicate(a,b)} })
447
        suppress=options[:suppress] || []
448
        bib="#{self['$type']}{#{self['$id']},\n"
449
450
        bibtex=self.to_a       
451
        pred=options[:sort_predicate]       
452
        pred=(lambda {|a,b| sort_predicate(a,b)}) if !pred && options.has_key?(:sort)
453
454
        if pred
455
          bibtex=bibtex.sort { |a,b| pred.call(a[0],b[0]) }
456
        end
457
        bibtex.each do |key,value|
458
          bib << " #{key}={#{value}},\n" if (!(key=~/\A\$/) && !suppress.include?(key))
459
        end
460
461
        bib << "}\n"
462
      end            
463
464
      # get author names ("firstname lastname") as array      
465
      def authors
466
        self['author'].split(' and ').map do |author| 
467
          author.split(',').reverse.join(' ').strip 
468
        end
469
      end           
470
471
    end # Entry    
472
473
    
474
    def initialize(input=nil)
475
      @timestamp=0
476
      @bbl_time=nil
477
      @authors=nil
478
      @authors_time=nil
479
      scan(input) if input
480
    end
481
    
482
    # make_bbl valled successfully and no scan or add since
483
    def have_bbl?
484
      @bbl_time==@timestamp
485
    end
486
487
    # ensure bbl entries are valid
488
    def ensure_bbl
489
      make_bbl if !have_bbl?
490
    end   
491
492
    # add data (sets '$from_source' tag if given)
493
    def scan(text,from_source=nil)
494
      @timestamp+=1
495
      lexer=BibLex.new(text)
496
      parser=BibParse.new(lexer,self,from_source)
497
      parser.parse
498
    end
499
    
500
    # add an entry (called by BibParse)
501
    def add(entry)
502
      @timestamp+=1
503
      self[entry['$id']]=entry      
504
    end
505
    
506
    # restore BibTeX file from fields
507
    def make_bibfile(filename)
508
      File.open(filename,'w+') do |f|
509
        self.each_value do |e|
510
          f.puts e.to_bib({:suppress=>[],:sort_predicate=>nil})
511
        end
512
      end
513
      filename
514
    end
515
    
516
    # get bbl as set by bibtex
517
    def make_bbl(style='plain')
518
      @bbl_time=nil
519
      #`rm /tmp/bibtexdata.*`      
520
      FileUtils::rm Dir.glob(File.join(BibTeX.tmpdir,'bibtexdata.*'))
521
      froot=File.join(BibTeX.tmpdir,'bibtexdata')
522
      make_bibfile(froot+'.bib')
523
      File.open(froot+'.tex','w+') do |f|
524
        f.puts <<END
525
\\documentclass{article}
526
\\begin{document}
527
\\nocite{*}
528
\\bibliographystyle{#{style}}
529
\\bibliography{bibtexdata}
530
\\end{document}
531
END
532
      end
533
534
      BibTeX.execute('cd %s ; %s -halt-on-error bibtexdata' % 
535
                     [BibTeX.tmpdir,@@latex]) do |info|
536
        if info[:status].exitstatus!=0
537
          msg="%s failed: %s" % [info[:desc],info[:stdout]]
538
          raise msg
539
        end
540
      end
541
542
      BibTeX.execute('cd %s ; %s bibtexdata' % [BibTeX.tmpdir,@@bibtex]) do |info|
543
        if info[:status].exitstatus!=0
544
          msg="%s failed: %s" % [info[:desc],info[:stdout]]
545
          raise msg
546
        end
547
      end
548
549
      bbl=IO.read(froot+'.bbl')      
550
      bbl.scan /^\\bibitem\{(.+)\}\n((?:(?:.+)\n)+)/ do |s| 
551
        id,text=$1,$2
552
        self[id]['$bbl']="\\bibitem{#{id}}\n#{text}"
553
      end
554
      FileUtils::rm Dir.glob(File.join(BibTeX.tmpdir,'bibtexdata.*'))
555
556
      @bbl_time=@timestamp
557
    end
558
            
559
560
    @@predicates_disabled=nil
561
562
    # disables use of predicates in query
563
    def BibTeXData.disable_predicates
564
      @@predicates_disabled=true
565
      @@predicates_disabled.freeze
566
    end
567
    
568
    # Query entries.
569
    # :call-seq:
570
    # query(options) -> Array of entries matching options
571
    # query(options) do |entry| ... end -> yield entries to block
572
    #
573
    # - +options+ is a Hash of fields and queried values, values are
574
    #   either an object responding to +:include?: or a string or 
575
    #   a regular expression.
576
    # - If given, +options[:predicate]+ is evaluated evaluate, unless
577
    #   disable_predicates has been called.
578
    # - If +options[:require]+ is given (+include?+) the respective 
579
    #   fields are required,
580
    #   the default is undefined fields pass all tests.
581
    # - Returns an Array which is empty if a block was given.
582
    def query(options)                 
583
584
      rv=[]      
585
      self.each_value do |entry|
586
        output=true
587
588
        if options.has_key?(:predicate)
589
          raise 'predicates are disabled' if @@predicates_disabled
590
          output &&= options[:predicate].call(entry)            
591
        end
592
        required=options[:require]
593
        
594
        if output
595
          options.each_pair do |key,value|
596
            field=entry[key]
597
            
598
            if required && required.include?(key)
599
              output &&= field
600
            end
601
602
            if output && field
603
              if value.respond_to?(:include?)                               
604
                output &&= value.include?(value.kind_of?(Range) ? field.to_i : field)
605
              elsif value.kind_of?(Regexp)
606
                output &&= (value =~ field.to_s)
607
              else
608
                output &&= (Regexp.new(value) =~ field.to_s)
609
              end
610
            end
611
612
            break if !output
613
          end # each_pair
614
        end
615
        
616
        if output
617
          if block_given?
618
            yield entry
619
          else
620
            rv << entry
621
          end     
622
        end
623
      end
624
      rv
625
    end
626
    
627
    # get index of all authors (array "firstname lastname" sorted by last word)
628
    def authors
629
      return @authors if @authors_time==@timestamp
630
      @authors={}
631
      self.each_value do |entry|
632
        entry.authors.each do |author|
633
          @authors[author]=true
634
        end
635
      end      
636
      @authors_time=@timestamp
637
      @authors=@authors.keys.sort { |a,b| a.split[-1] <=> b.split[-1] }
638
    end       
639
  
640
  
641
    #
642
    # apply standard sustitutions (remove fields, adapt fields)
643
    #
644
    # change type (journal, ...) // remove technical reports
645
    #
646
    # make latex include (list of publications)
647
    # make bbl->html (homepage) -- associate picture
648
    # add category   
649
    #     
650
  end # BibTexData
651
  
652
653
  # render BibTexData output
654
  class Renderer
655
656
    def initialize(bibtexdata)
657
      @db=bibtexdata
658
      @author_info = nil
659
      # get urls for entries
660
    end    
661
662
    DEFAULT_TEMPLATE=%q{
663
<%= bbl_authors %><br>
664
<em><%= bbl_title %></em><br>
665
<%= bbl_remainder.join("\n").gsub("/n",'<br>') %><br>
666
}.freeze
667
668
    # convert  netry to html musing ERB template
669
    # - entry a BibTeXData::Entry
670
    # - erb_template an ERB object or a string defining an ERB template
671
    # The method defines variables +bbl+, +bbl_authors+, +bbl_title+,
672
    # +bbl_remainder+ from +entry['$bbl'].
673
    def html(entry,erb_template=DEFAULT_TEMPLATE,caller=binding)      
674
      if erb_template.kind_of?(ERB)
675
        template=erb_template
676
      else       
677
        template=ERB.new(erb_template,nil,'<>')
678
      end
679
      if template.src =~ /bbl/
680
        @db.ensure_bbl
681
        bbl=BibTeX.latex2html(entry['$bbl'])
682
        bbl=bbl.sub(/^\\bibitem.*\n/,'')
683
        bbl=bbl.split(/\\newblock\s+/).map { |line| line.strip }
684
        bbl_authors=bbl[0]
685
        bbl_title=bbl[1]
686
        bbl_remainder=bbl[2,bbl.length]
687
      end      
688
      template.result(binding)
689
    end
690
691
  end
692
 
693
694
  # extensions:
695
  # author info -> url bbl_html includes links
696
  # paper/project -> url ...
697
  # picture ...
698
    
699
  # extra info from file (comment, picture, url)
700
    
701
  # generate index (all publications, all authors) 
702
  
703
704
end # BibTex
705
706
707
=begin
708
709
* config directory
710
** generate author index
711
** generate key index
712
* read files with additional information (authors, general: ".attribute)
713
* default links: ask google (define "person_link","title_link")
714
715
=end