Monday, July 14, 2008

使用Perl和MySQL找出书名中的中文词语

使用Perl找出书名中的中文词语,用于搜索引擎:

  1. 导入中文词库到MySQL数据库

    #!/usr/bin/perl
    # $Id$
    # Add booknames to database
    use DBI;

    my $database = 'zhu_tags_filter';
    my $hostname = '192.168.1.249';
    my $port = '3306';
    my $user = 'root';
    my $password = '123456';
    my $input_file = '/mnt/books/dict.txt';

    $dsn = "DBI:mysql:database=$database;host=$hostname;port=$port";
    $dbh = DBI->connect($dsn, $user, $password);
    $sth = $dbh->codepare("set NAMES 'utf8'");
    $sth->execute;
    $sth->finish;

    open(INPUT, "< $input_file")
    or die "Couldn't open $input_file";

    while () {
    $dbh->do("INSERT INTO dict(name) VALUES(?)",undef, $_);
    }
    close(INPUT);

    $dbh->disconnect();


  2. 匹配中文词库

    #!/usr/bin/perl
    # $Id$
    # Generat keywords form Chinese dict and booknams
    use strict;
    use warnings;
    use DBI;

    my $db = 'zhu_tags_filter';
    my $host = '192.168.1.249';
    my $user = 'root';
    my $password = '123456';

    my $dbh = DBI->connect("DBI:mysql:database=$db;host=$host",
    $user, $password);
    $dbh->do("SET NAMES 'utf8'");


    my $sth = $dbh->codepare("SELECT name FROM dict");
    $sth->execute();

    my $n = 0;
    do {
    while (my @row = $sth->fetchrow_array()) {
    $n++;
    my $keyword = $row[0];
    my $hn = $dbh->codepare("SELECT id FROM all_booknames WHERE name LIKE '%$keyword%' LIMIT 1");
    $hn->execute;
    if ($hn->rows > 0){
    $dbh->do("INSERT INTO keywords (name) VALUES('$row[0]')");
    }
    $hn->finish;
    }
    } until(!$sth->more_results)


No comments:

Post a Comment