[cgi-wiki-dev] patch to CGI::Wiki
Tom Insam
cgi-wiki-dev@earth.li
Mon, 13 Sep 2004 21:07:53 +0100
--Apple-Mail-7-834756576
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
charset=US-ASCII;
format=flowed
On Sep 3, 2004, at 18:44, Kake L Pugh wrote:
> On Fri 03 Sep 2004, Tom Insam <tom@jerakeen.org> wrote:
>> This is a nasty first-cut patch to add proper character set support to
>> CGI::Wiki, as per my anguished rambling in #openguides today.
> OK - nothing is going to get done about this until I get back from
> holiday in a week, so if you want to tidy it up/add anything
> else/abstract it a bit more, you have until then. *Mail me again no
> sooner than a week on Monday*.
>
>> bad things about it: apart from the ugliness? It requires perl 5.8.
>
> Yes, you need to fix that :)
And I've fixed it. Attached is a patch to add charset support to
CGI::Wiki that falls back to the old
behaviour if you are running under 5.6.
The current CVS CGI::Wiki::Kwiki takes advantage of this, and default
new C::W::K installs will be full utf-8 wikis. Imports from CGI::Kwiki
wikis that have used nasty utf8 chars seem to work, and you can put
high-bit stuff into pages.
tom
--Apple-Mail-7-834756576
Content-Type: multipart/appledouble;
boundary=Apple-Mail-8-834756576
Content-Disposition: attachment
--Apple-Mail-8-834756576
Content-Transfer-Encoding: base64
Content-Type: application/applefile;
name="cgi_wiki.diff"
Content-Disposition: attachment;
filename=cgi_wiki.diff
AAUWBwACAAAAAAAAAAAAAAAAAAAAAAAAAAMAAAAJAAAAPgAAAAoAAAADAAAASAAAAA0AAAACAAAA
VQAABq5URVhUUipjaAAAY2dpX3dpa2kuZGlmZgAAAQAAAAZoAAAFaAAAAEYAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AABIAAlNb25hY28AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAAIALAAiA1YCogAsACIDVgKi
vWu8CwAAJ+oAACfqAAAg9wEBAAAFGFIqY2gAhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABB0NvdXJpZXIAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAJAAAABAlIZWx2ZXRpY2EAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMQ29uZmlkZW50aWFsAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAABAAAAAQABAAABgAAAAIAAAACAAAAAgAAAAAAAAQEBAAEBAQAAAAACAFAB
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKaXNvLTg4NTktMQAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAABAAAABmgAAAVoAAAARgBgUIwFRgAAABwARgABTVBTUgAAABJCQlNU
AAAAHgPt//8AAAAAAH+bkACA//8AAABMAH+bpA==
--Apple-Mail-8-834756576
Content-Transfer-Encoding: 7bit
Content-Type: application/text;
x-mac-type=54455854;
x-unix-mode=0644;
x-mac-creator=522A6368;
name="cgi_wiki.diff"
Content-Disposition: attachment;
filename=cgi_wiki.diff
diff -ur CGI-Wiki-0.54/lib/CGI/Wiki/Store/Database.pm CGI-Wiki-changed/lib/CGI/Wiki/Store/Database.pm
--- CGI-Wiki-0.54/lib/CGI/Wiki/Store/Database.pm Fri Jun 25 20:26:14 2004
+++ CGI-Wiki-changed/lib/CGI/Wiki/Store/Database.pm Mon Sep 13 20:12:22 2004
@@ -13,6 +13,17 @@
$VERSION = '0.22';
+# first, detect if Encode is available - it's not under 5.6. If we _are_
+# under 5.6, give up - we'll just have to hope that nothing explodes. This
+# is the current 0.54 behaviour, so that's ok.
+
+my $CAN_USE_ENCODE;
+BEGIN {
+ eval " use Encode ";
+ $CAN_USE_ENCODE = $@ ? 0 : 1;
+}
+
+
=head1 NAME
CGI::Wiki::Store::Database - parent class for database storage backends
@@ -86,6 +97,7 @@
$self->{_dbuser} = $args{dbuser} || "";
$self->{_dbpass} = $args{dbpass} || "";
$self->{_dbhost} = $args{dbhost} || "";
+ $self->{_charset} = $args{charset} || "iso-8859-1";
# Connect to database and store the database handle.
my ($dbname, $dbuser, $dbpass, $dbhost) =
@@ -167,12 +179,12 @@
# specified in the call.
my $dbh = $self->dbh;
my $sql = "SELECT metadata_type, metadata_value FROM metadata WHERE "
- . "node=" . $dbh->quote($args{name}) . " AND "
- . "version=" . $dbh->quote($data{version});
+ . "node=" . $dbh->quote($self->charset_encode($args{name})) . " AND "
+ . "version=" . $dbh->quote($self->charset_encode($data{version}));
my $sth = $dbh->prepare($sql);
$sth->execute or croak $dbh->errstr;
my %metadata;
- while ( my ($type, $val) = $sth->fetchrow_array ) {
+ while ( my ($type, $val) = $self->charset_decode( $sth->fetchrow_array ) ) {
if ( defined $metadata{$type} ) {
push @{$metadata{$type}}, $val;
} else {
@@ -194,13 +206,13 @@
my $sql;
if ( $args{version} ) {
$sql = "SELECT text, version, modified FROM content"
- . " WHERE name=" . $dbh->quote($args{name})
- . " AND version=" . $dbh->quote($args{version});
+ . " WHERE name=" . $dbh->quote($self->charset_encode($args{name}))
+ . " AND version=" . $dbh->quote($self->charset_encode($args{version}));
} else {
$sql = "SELECT text, version, modified FROM node
- WHERE name=" . $dbh->quote($args{name});
+ WHERE name=" . $dbh->quote($self->charset_encode($args{name}));
}
- my @results = $dbh->selectrow_array($sql);
+ my @results = $self->charset_decode( $dbh->selectrow_array($sql) );
@results = ("", 0, "") unless scalar @results;
my %data;
@data{ qw( content version last_modified ) } = @results;
@@ -216,7 +228,7 @@
$string .= "\0\0\0" . $key . "\0\0"
. join("\0", sort @{$metadata{$key}} );
}
- return md5_hex($string);
+ return md5_hex($self->charset_encode($string));
}
# Expects an array of hashes whose keys and values are scalars.
@@ -293,7 +305,7 @@
my $sth = $dbh->prepare($sql);
$sth->execute or croak $dbh->errstr;
my @backlinks;
- while ( my $backlink = $sth->fetchrow_array ) {
+ while ( my ($backlink) = $self->charset_decode( $sth->fetchrow_array ) ) {
push @backlinks, $backlink;
}
return @backlinks;
@@ -320,7 +332,7 @@
my $sth = $dbh->prepare($sql);
$sth->execute or croak $dbh->errstr;
my @links;
- while ( my $link = $sth->fetchrow_array ) {
+ while ( my ($link) = $self->charset_decode( $sth->fetchrow_array ) ) {
push @links, $link;
}
return @links;
@@ -395,15 +407,15 @@
croak "Can't get version number" unless $version;
$version++;
$sql = "UPDATE node SET version=" . $dbh->quote($version)
- . ", text=" . $dbh->quote($content)
+ . ", text=" . $dbh->quote($self->charset_encode($content))
. ", modified=" . $dbh->quote($timestamp)
- . " WHERE name=" . $dbh->quote($node);
+ . " WHERE name=" . $dbh->quote($self->charset_encode($node));
$dbh->do($sql) or croak "Error updating database: " . DBI->errstr;
} else {
$version = 1;
$sql = "INSERT INTO node (name, version, text, modified)
VALUES ("
- . join(", ", map { $dbh->quote($_) }
+ . join(", ", map { $dbh->quote($self->charset_encode($_)) }
($node, $version, $content, $timestamp)
)
. ")";
@@ -413,7 +425,7 @@
# In either case we need to add to the history.
$sql = "INSERT INTO content (name, version, text, modified)
VALUES ("
- . join(", ", map { $dbh->quote($_) }
+ . join(", ", map { $dbh->quote($self->charset_encode($_)) }
($node, $version, $content, $timestamp)
)
. ")";
@@ -421,10 +433,10 @@
# And to the backlinks.
$dbh->do("DELETE FROM internal_links WHERE link_from="
- . $dbh->quote($node) ) or croak $dbh->errstr;
+ . $dbh->quote($self->charset_encode($node)) ) or croak $dbh->errstr;
foreach my $links_to ( @links_to ) {
$sql = "INSERT INTO internal_links (link_from, link_to) VALUES ("
- . join(", ", map { $dbh->quote($_) } ( $node, $links_to ) ) . ")";
+ . join(", ", map { $dbh->quote($self->charset_encode($_)) } ( $node, $links_to ) ) . ")";
# Better to drop a backlink or two than to lose the whole update.
# Shevek wants a case-sensitive wiki, Jerakeen wants a case-insensitive
# one, MySQL compares case-sensitively on varchars unless you add
@@ -457,7 +469,7 @@
foreach my $value ( @values ) {
my $sql = "INSERT INTO metadata "
. "(node, version, metadata_type, metadata_value) VALUES ("
- . join(", ", map { $dbh->quote($_) }
+ . join(", ", map { $dbh->quote($self->charset_encode($_)) }
( $node, $version, $type, $value )
)
. ")";
@@ -469,7 +481,7 @@
my $value_to_store = $self->_checksum_hashes( @values );
my $sql = "INSERT INTO metadata "
. "(node, version, metadata_type, metadata_value) VALUES ("
- . join(", ", map { $dbh->quote($_) }
+ . join(", ", map { $dbh->quote($self->charset_encode($_)) }
( $node, $version, $type_to_store, $value_to_store )
)
. ")";
@@ -843,7 +855,7 @@
my $sth = $dbh->prepare( "SELECT metadata_type, metadata_value
FROM metadata WHERE node=? AND version=?" );
$sth->execute( $find->{name}, $find->{version} );
- while ( my ($type, $value) = $sth->fetchrow_array ) {
+ while ( my ($type, $value) = $self->charset_decode( $sth->fetchrow_array ) ) {
if ( defined $metadata{$type} ) {
push @{$metadata{$type}}, $value;
} else {
@@ -869,7 +881,7 @@
my $dbh = $self->dbh;
my $sql = "SELECT name FROM node;";
my $nodes = $dbh->selectall_arrayref($sql);
- return ( map { $_->[0] } (@$nodes) );
+ return ( map { $self->charset_decode( $_->[0] ) } (@$nodes) );
}
=item B<list_nodes_by_metadata>
@@ -1011,6 +1023,36 @@
return if $self->{_external_dbh};
my $dbh = $self->dbh;
$dbh->disconnect if $dbh;
+}
+
+# decode a string of octets into perl's internal encoding, based on the
+# charset parameter we were passed. Takes a list, returns a list.
+sub charset_decode {
+ my $self = shift;
+ my @input = @_;
+ if ($CAN_USE_ENCODE) {
+ my @output;
+ for (@input) {
+ push( @output, Encode::decode( $self->{_charset}, $_ ) );
+ }
+ return @output;
+ }
+ return @input;
+}
+
+# convert a perl string into a series of octets we can put into the database
+# takes a list, returns a list
+sub charset_encode {
+ my $self = shift;
+ my @input = @_;
+ if ($CAN_USE_ENCODE) {
+ my @output;
+ for (@input) {
+ push( @output, Encode::encode( $self->{_charset}, $_ ) );
+ }
+ return @output;
+ }
+ return @input;
}
1;
diff -ur CGI-Wiki-0.54/lib/CGI/Wiki.pm CGI-Wiki-changed/lib/CGI/Wiki.pm
--- CGI-Wiki-0.54/lib/CGI/Wiki.pm Fri Jun 25 20:29:34 2004
+++ CGI-Wiki-changed/lib/CGI/Wiki.pm Mon Sep 13 21:03:56 2004
@@ -8,6 +8,17 @@
use Carp qw(croak carp);
use Digest::MD5 "md5_hex";
+# first, detect if Encode is available - it's not under 5.6. If we _are_
+# under 5.6, give up - we'll just have to hope that nothing explodes. This
+# is the current 0.54 behaviour, so that's ok.
+
+my $CAN_USE_ENCODE;
+BEGIN {
+ eval " use Encode ";
+ $CAN_USE_ENCODE = $@ ? 0 : 1;
+}
+
+
=head1 NAME
CGI::Wiki - A toolkit for building Wikis.
@@ -474,8 +485,9 @@
sub search_nodes {
my ($self, @args) = @_;
+ my @terms = map { $self->store->charset_encode($_) } @args;
if ( $self->search_obj ) {
- $self->search_obj->search_nodes( @args );
+ $self->search_obj->search_nodes( @terms );
} else {
croak "No search backend defined.";
}
@@ -657,6 +669,7 @@
$checksum = md5_hex("") unless defined $checksum;
my $formatter = $self->{_formatter};
+
my @links_to;
if ( $formatter->can( "find_internal_links" ) ) {
# Supply $metadata to formatter in case it's needed to alter the
@@ -679,7 +692,7 @@
my $search = $self->{_search};
if ($search and $content) {
- $search->index_node($node, $content);
+ $search->index_node($node, $store->charset_encode($content) );
}
return 1;
}
@@ -700,7 +713,17 @@
my $formatter = $self->{_formatter};
# Add on $self to the call so the formatter can access things like whether
# a linked-to node exists, etc.
- return $formatter->format( $raw, $self, $metadata );
+ my $result = $formatter->format( $raw, $self, $metadata );
+
+ # Nasty hack to work around an HTML::Parser deficiency
+ # see http://rt.cpan.org/NoAuth/Bug.html?id=7014
+ if ($CAN_USE_ENCODE) {
+ if (Encode::is_utf8($raw)) {
+ Encode::_utf8_on( $result );
+ }
+ }
+
+ return $result;
}
=item B<store>
--Apple-Mail-8-834756576--
--Apple-Mail-7-834756576--