[cgi-wiki-dev] patch to CGI::Wiki
Tom Insam
cgi-wiki-dev@earth.li
Fri, 3 Sep 2004 16:34:38 +0100
--Apple-Mail-6--45638562
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
charset=US-ASCII;
format=flowed
This is a nasty first-cut patch to add proper character set support to
CGI::Wiki, as per my anguished rambling in #openguides today. It's
nasty. But then, frankly, CGI::Wiki could do with a bit of database
abstraction...
good things about it: I don't think it'll change anything if you don't
use it. The default is iso-8895-1, which is the effective default if
you don't say anything else. Search::InvertedIndex is broken and won't
index stuff with the utf-8 flag set, so I index the raw bytes, again,
this means that current live data shouldn't be affected. Not that I've
tried this.
bad things about it: apart from the ugliness? It requires perl 5.8.
This is a very bad thing, but there are ways of getting around it. New
version soon. There's a work-around in the patch for HTML::Parser's
uselessness as well, this can go away when
http://rt.cpan.org/NoAuth/Bug.html?id=7014 is landed.
I have an example living at http://movieos.org/wiki/wiki.cgi - this is
a patched CGI::Wiki and a CGI::Wiki::Kwiki patched to use it. You'll
need a decent font. :-)
tom
--Apple-Mail-6--45638562
Content-Transfer-Encoding: 7bit
Content-Type: application/octet-stream;
x-unix-mode=0664;
name="CGI-Wiki_UTF8_patch"
Content-Disposition: attachment;
filename=CGI-Wiki_UTF8_patch
Only in .: .DS_Store
Only in .: Makefile
Only in .: blib
Only in ./lib: .DS_Store
Only in ./lib/CGI: .DS_Store
Only in ./lib/CGI/Wiki: .DS_Store
diff -ur /tmp/CGI-Wiki-0.54/lib/CGI/Wiki/Store/Database.pm ./lib/CGI/Wiki/Store/Database.pm
--- /tmp/CGI-Wiki-0.54/lib/CGI/Wiki/Store/Database.pm Fri Jun 25 20:26:14 2004
+++ ./lib/CGI/Wiki/Store/Database.pm Fri Sep 3 14:39:56 2004
@@ -10,6 +10,7 @@
use Time::Seconds;
use Carp qw( carp croak );
use Digest::MD5 qw( md5_hex );
+use Encode;
$VERSION = '0.22';
@@ -86,6 +87,7 @@
$self->{_dbuser} = $args{dbuser} || "";
$self->{_dbpass} = $args{dbpass} || "";
$self->{_dbhost} = $args{dbhost} || "";
+ $self->{_charset} = $args{charset} || "iso-8859-1";
# Connect to database and store the database handle.
my ($dbname, $dbuser, $dbpass, $dbhost) =
@@ -167,12 +169,12 @@
# specified in the call.
my $dbh = $self->dbh;
my $sql = "SELECT metadata_type, metadata_value FROM metadata WHERE "
- . "node=" . $dbh->quote($args{name}) . " AND "
- . "version=" . $dbh->quote($data{version});
+ . "node=" . $dbh->quote($self->charset_encode($args{name})) . " AND "
+ . "version=" . $dbh->quote($self->charset_encode($data{version}));
my $sth = $dbh->prepare($sql);
$sth->execute or croak $dbh->errstr;
my %metadata;
- while ( my ($type, $val) = $sth->fetchrow_array ) {
+ while ( my ($type, $val) = $self->charset_decode( $sth->fetchrow_array ) ) {
if ( defined $metadata{$type} ) {
push @{$metadata{$type}}, $val;
} else {
@@ -194,13 +196,13 @@
my $sql;
if ( $args{version} ) {
$sql = "SELECT text, version, modified FROM content"
- . " WHERE name=" . $dbh->quote($args{name})
- . " AND version=" . $dbh->quote($args{version});
+ . " WHERE name=" . $dbh->quote($self->charset_encode($args{name}))
+ . " AND version=" . $dbh->quote($self->charset_encode($args{version}));
} else {
$sql = "SELECT text, version, modified FROM node
- WHERE name=" . $dbh->quote($args{name});
+ WHERE name=" . $dbh->quote($self->charset_encode($args{name}));
}
- my @results = $dbh->selectrow_array($sql);
+ my @results = $self->charset_decode( $dbh->selectrow_array($sql) );
@results = ("", 0, "") unless scalar @results;
my %data;
@data{ qw( content version last_modified ) } = @results;
@@ -216,7 +218,7 @@
$string .= "\0\0\0" . $key . "\0\0"
. join("\0", sort @{$metadata{$key}} );
}
- return md5_hex($string);
+ return md5_hex($self->charset_encode($string));
}
# Expects an array of hashes whose keys and values are scalars.
@@ -293,7 +295,7 @@
my $sth = $dbh->prepare($sql);
$sth->execute or croak $dbh->errstr;
my @backlinks;
- while ( my $backlink = $sth->fetchrow_array ) {
+ while ( my ($backlink) = $self->charset_decode( $sth->fetchrow_array ) ) {
push @backlinks, $backlink;
}
return @backlinks;
@@ -320,7 +322,7 @@
my $sth = $dbh->prepare($sql);
$sth->execute or croak $dbh->errstr;
my @links;
- while ( my $link = $sth->fetchrow_array ) {
+ while ( my ($link) = $self->charset_decode( $sth->fetchrow_array ) ) {
push @links, $link;
}
return @links;
@@ -395,15 +397,15 @@
croak "Can't get version number" unless $version;
$version++;
$sql = "UPDATE node SET version=" . $dbh->quote($version)
- . ", text=" . $dbh->quote($content)
+ . ", text=" . $dbh->quote($self->charset_encode($content))
. ", modified=" . $dbh->quote($timestamp)
- . " WHERE name=" . $dbh->quote($node);
+ . " WHERE name=" . $dbh->quote($self->charset_encode($node));
$dbh->do($sql) or croak "Error updating database: " . DBI->errstr;
} else {
$version = 1;
$sql = "INSERT INTO node (name, version, text, modified)
VALUES ("
- . join(", ", map { $dbh->quote($_) }
+ . join(", ", map { $dbh->quote($self->charset_encode($_)) }
($node, $version, $content, $timestamp)
)
. ")";
@@ -413,7 +415,7 @@
# In either case we need to add to the history.
$sql = "INSERT INTO content (name, version, text, modified)
VALUES ("
- . join(", ", map { $dbh->quote($_) }
+ . join(", ", map { $dbh->quote($self->charset_encode($_)) }
($node, $version, $content, $timestamp)
)
. ")";
@@ -421,10 +423,10 @@
# And to the backlinks.
$dbh->do("DELETE FROM internal_links WHERE link_from="
- . $dbh->quote($node) ) or croak $dbh->errstr;
+ . $dbh->quote($self->charset_encode($node)) ) or croak $dbh->errstr;
foreach my $links_to ( @links_to ) {
$sql = "INSERT INTO internal_links (link_from, link_to) VALUES ("
- . join(", ", map { $dbh->quote($_) } ( $node, $links_to ) ) . ")";
+ . join(", ", map { $dbh->quote($self->charset_encode($_)) } ( $node, $links_to ) ) . ")";
# Better to drop a backlink or two than to lose the whole update.
# Shevek wants a case-sensitive wiki, Jerakeen wants a case-insensitive
# one, MySQL compares case-sensitively on varchars unless you add
@@ -457,7 +459,7 @@
foreach my $value ( @values ) {
my $sql = "INSERT INTO metadata "
. "(node, version, metadata_type, metadata_value) VALUES ("
- . join(", ", map { $dbh->quote($_) }
+ . join(", ", map { $dbh->quote($self->charset_encode($_)) }
( $node, $version, $type, $value )
)
. ")";
@@ -469,7 +471,7 @@
my $value_to_store = $self->_checksum_hashes( @values );
my $sql = "INSERT INTO metadata "
. "(node, version, metadata_type, metadata_value) VALUES ("
- . join(", ", map { $dbh->quote($_) }
+ . join(", ", map { $dbh->quote($self->charset_encode($_)) }
( $node, $version, $type_to_store, $value_to_store )
)
. ")";
@@ -843,7 +845,7 @@
my $sth = $dbh->prepare( "SELECT metadata_type, metadata_value
FROM metadata WHERE node=? AND version=?" );
$sth->execute( $find->{name}, $find->{version} );
- while ( my ($type, $value) = $sth->fetchrow_array ) {
+ while ( my ($type, $value) = $self->charset_decode( $sth->fetchrow_array ) ) {
if ( defined $metadata{$type} ) {
push @{$metadata{$type}}, $value;
} else {
@@ -869,7 +871,7 @@
my $dbh = $self->dbh;
my $sql = "SELECT name FROM node;";
my $nodes = $dbh->selectall_arrayref($sql);
- return ( map { $_->[0] } (@$nodes) );
+ return ( map { $self->charset_decode( $_->[0] ) } (@$nodes) );
}
=item B<list_nodes_by_metadata>
@@ -1011,6 +1013,30 @@
return if $self->{_external_dbh};
my $dbh = $self->dbh;
$dbh->disconnect if $dbh;
+}
+
+# decode a string of octets into perl's internal encoding, based on the
+# charset parameter we were passed. Takes a list, returns a list.
+sub charset_decode {
+ my $self = shift;
+ my @input = @_;
+ my @output;
+ for (@input) {
+ push( @output, Encode::decode( $self->{_charset}, $_ ) );
+ }
+ return @output;
+}
+
+# convert a perl string into a series of octets we can put into the database
+# takes a list, returns a list
+sub charset_encode {
+ my $self = shift;
+ my @input = @_;
+ my @output;
+ for (@input) {
+ push( @output, Encode::encode( $self->{_charset}, $_ ) );
+ }
+ return @output;
}
1;
Only in ./lib/CGI/Wiki: TestConfig.pm
Only in ./lib/CGI/Wiki: TestConfig.pm~
diff -ur /tmp/CGI-Wiki-0.54/lib/CGI/Wiki.pm ./lib/CGI/Wiki.pm
--- /tmp/CGI-Wiki-0.54/lib/CGI/Wiki.pm Fri Jun 25 20:29:34 2004
+++ ./lib/CGI/Wiki.pm Fri Sep 3 15:37:27 2004
@@ -474,8 +474,9 @@
sub search_nodes {
my ($self, @args) = @_;
+ my @terms = map { $self->store->charset_encode($_) } @args;
if ( $self->search_obj ) {
- $self->search_obj->search_nodes( @args );
+ $self->search_obj->search_nodes( @terms );
} else {
croak "No search backend defined.";
}
@@ -679,7 +680,7 @@
my $search = $self->{_search};
if ($search and $content) {
- $search->index_node($node, $content);
+ $search->index_node($node, $store->charset_encode($content) );
}
return 1;
}
@@ -700,7 +701,15 @@
my $formatter = $self->{_formatter};
# Add on $self to the call so the formatter can access things like whether
# a linked-to node exists, etc.
- return $formatter->format( $raw, $self, $metadata );
+ my $result = $formatter->format( $raw, $self, $metadata );
+
+ # Nasty hack to work around an HTML::Parser deficiency
+ use Encode;
+ if (Encode::is_utf8($raw)) {
+ Encode::_utf8_on( $result );
+ }
+
+ return $result;
}
=item B<store>
Only in .: pm_to_blib
Only in ./t: sii-db-file-test.db
Only in ./t: sqlite-test.db
--Apple-Mail-6--45638562--