#!/usr/bin/perl -T
#
# pitchfork reviews project
# cgi to generate concordances for a given word (or pair of words!)
#
# loren jan wilson, december 2003?
#
use strict;
use CGI qw(:standard);
use CGI::Carp qw(fatalsToBrowser);
use DBI;
use Reviews;
$CGI::POST_MAX=1024 * 15; # max 20K posts
$CGI::DISABLE_UPLOADS = 1; # no uploads
$ENV{PATH} = "";
my $q = new CGI;
print $q->header(-expires=>'now');
print $q->start_html();
my $begin_time = time();
my $dbh = make_db_handle;
my $id = $q->param('id') || 100;
$id =~ s/[^\d]//g;
$id = substr($id, 0, 10);
my $word_id = $id;
my $id2 = $q->param('id2') || "";
$id2 =~ s/[^\d]//g;
$id2 = substr($id2, 0, 10);
my $word_id2 = $id2;
my $limit = $q->param('limit') || 50; #only return 100 at a time
$limit =~ s/[^\d]//g;
$limit = substr($limit, 0, 4);
my $offset = $q->param('offset') || 0;
$offset =~ s/[^\d]//g;
$offset = substr($offset, 0, 7);
# get word and print it at the top
my $word = dbquery($dbh, qq/
SELECT word FROM word WHERE id = $word_id
/);
my $word2 = "This word will never occur";
if ($id2) {
$word2 = dbquery($dbh, qq/
SELECT word FROM word WHERE id = $word_id2
/);
$limit = 5000;
}
my $wordcount = dbquery($dbh, qq/
SELECT count(word_id) FROM word_sentence WHERE word_id = $word_id
/);
my $firstnum = $offset+1;
my $lastnum = $limit+$offset;
if ($word2 eq "This word will never occur") {
$word2 = " ";
}
print "
$word $word2
\nShowing sentences $firstnum - $lastnum; there are $wordcount total occurrences in the database.";
my $new_url = $q->url(-relative=>1, -query=>1);
if ($new_url =~ /offset=(\d+)/) {
my $new_offset = $1 + $limit;
$new_url =~ s/offset=(\d+)/offset=$new_offset/;
} else {
my $new_offset = $offset + $limit;
$new_url = $new_url.";offset=$new_offset";
}
print qq%
next $limit>>%;
print "\n";
# get sentences and print them one at a time
# sort the sentences by date and then sentence_id.
# also get information from articles for each sentence..?!
my @fields = qw(sentence.id sentence.beginchar sentence.length band.name album.name label.name rating_id author.id author.name article.id article.date paragraph.position paragraph.text);
my $selectstring = join(",", @fields);
my $sentences = dbquery($dbh,qq/
SELECT $selectstring
FROM word_sentence,sentence,paragraph,article,album,band,label,author
WHERE word_id = $word_id
AND sentence_id = sentence.id
AND paragraph_id = paragraph.id
AND article_id = article.id
AND album_id = album.id
AND band_id = band.id
AND label_id = label.id
AND author_id = author.id
ORDER BY date DESC, sentence_id
LIMIT $offset,$limit
/);
# if we only have one word, there is an error...this is the workaround
if (! ref $$sentences[0]) { $sentences = [ $sentences ]; }
# print all the stuff...
my %seen_sentence_id;
my @ratings;
foreach my $sentence (@$sentences) {
my %sentence;
if (! ref $sentence) { $sentence = [ $sentence ]; }
# load values into hash using items from fields as keys
foreach my $i ( 0 .. $#fields ) {
$sentence{$fields[$i]} = $$sentence[$i];
}
# don't print a sentence twice
next if $seen_sentence_id{$sentence{'sentence.id'}};
# find out whether the second word is also in this sentence
my $sentence2 = "";
if ($word_id2) {
$sentence2 = dbquery($dbh, qq/
SELECT word_id FROM word_sentence
WHERE word_id = $word_id2
AND sentence_id = $sentence{'sentence.id'}
LIMIT 1
/);
next if (! $sentence2);
}
# parse sentence text from paragraph.text using beginchar and length
my $offset = $sentence{"sentence.beginchar"} - 1;
my $length = $sentence{"sentence.length"};
my $text = $sentence{"paragraph.text"};
my $sentence_text = substr($text,$offset,$length);
# highlight the word in the text
$sentence_text =~ s/([^\w])${word}([^\w])/$1${word}<\/b>$2/g;
$sentence_text =~ s/([^\w])${word2}([^\w])/$1${word2}<\/b>$2/g;
# print band.name: album.name [label.name; rating:rating_id/100]
# by author.name (author.id) on article.date ...(article.id)
my $date = $sentence{"article.date"};
($date = "date unknown") if (! $date);
print "- ";
print qq();
print qq($sentence{"band.name"}: );
print qq($sentence{"album.name"} );
print qq([$sentence{"label.name"}; rating $sentence{"rating_id"}/100] );
print qq($sentence{"author.name"}, $date);
print "
\n";
print qq/$sentence_text (p.$sentence{"paragraph.position"})/;
print "\n";
# collect this rating
push @ratings, $sentence{'rating_id'};
# mark this sentence as printed and move on
$seen_sentence_id{$sentence{'sentence.id'}} = 1;
}
# calculate average rating
my $total_rating;
my $num_of_ratings;
foreach my $rating (@ratings) {
$total_rating = $total_rating + $rating;
$num_of_ratings++;
}
my $avg_rating = $total_rating / $num_of_ratings;
print "\n
";
print "\naverage rating: $avg_rating";
my $end_time = time();
my $time_elapsed = $end_time - $begin_time;
print "\n
query time: $time_elapsed seconds\n";
print $q->end_html();