Commit 53fd6325 authored by Sergei Golubchik's avatar Sergei Golubchik

remove groonga examples

to follow the similar upstream change
parent 1815719a
SUBDIRS = \
edict \
eijiro \
gene95 \
jmdict
dist_examples_dictionary_SCRIPTS = \
init-db.sh
nobase_dist_examples_dictionary_DATA = \
readme.txt \
$(html_files)
# find html -type f | sort | sed -e 's,^,\t,g'
html_files = \
html/css/dictionary.css \
html/css/smoothness/images/ui-bg_flat_0_aaaaaa_40x100.png \
html/css/smoothness/images/ui-bg_flat_75_ffffff_40x100.png \
html/css/smoothness/images/ui-bg_glass_55_fbf9ee_1x400.png \
html/css/smoothness/images/ui-bg_glass_65_ffffff_1x400.png \
html/css/smoothness/images/ui-bg_glass_75_dadada_1x400.png \
html/css/smoothness/images/ui-bg_glass_75_e6e6e6_1x400.png \
html/css/smoothness/images/ui-bg_glass_95_fef1ec_1x400.png \
html/css/smoothness/images/ui-bg_highlight-soft_75_cccccc_1x100.png \
html/css/smoothness/images/ui-icons_222222_256x240.png \
html/css/smoothness/images/ui-icons_2e83ff_256x240.png \
html/css/smoothness/images/ui-icons_454545_256x240.png \
html/css/smoothness/images/ui-icons_888888_256x240.png \
html/css/smoothness/images/ui-icons_cd0a0a_256x240.png \
html/css/smoothness/jquery-ui-1.8.12.custom.css \
html/index.html \
html/js/dictionary.js \
html/js/jquery-1.7.2.js \
html/js/jquery-ui-1.8.18.custom.js
edictdir = $(examples_dictionarydir)/edict
dist_edict_SCRIPTS = \
edict2grn.rb \
edict-import.sh
#!/bin/sh
base_dir=$(dirname $0)
if [ 1 != $# -a 2 != $# ]; then
echo "usage: $0 db_path [edict.gz_path]"
exit 1
fi
if [ -z $2 ]; then
edict_gz=edict.gz
if [ ! -f $edict_gz ]; then
wget -O $edict_gz http://ftp.monash.edu.au/pub/nihongo/edict.gz
fi
else
edict_gz=$2
fi
if type gzcat > /dev/null 2>&1; then
zcat="gzcat"
else
zcat="zcat"
fi
if $zcat $edict_gz | ${base_dir}/edict2grn.rb | groonga $1 > /dev/null; then
echo "edict data loaded."
fi
#!/usr/bin/env ruby
require "English"
require "nkf"
require "json"
print(<<HEADER.chomp)
column_create item_dictionary edict_desc COLUMN_SCALAR ShortText
column_create bigram item_dictionary_edict_desc COLUMN_INDEX|WITH_POSITION item_dictionary edict_desc
load --table item_dictionary
[
["_key","edict_desc","kana"]
HEADER
loop do
raw_line = gets
break if raw_line.nil?
line = raw_line.encode("UTF-8", "EUC-JP")
key, body = line.strip.split("/", 2)
key = key.strip
if /\s*\[(.+)\]\z/ =~ key
key = $PREMATCH
reading = $1
body = "[#{reading}] #{body}"
kana = NKF.nkf("-Ww --katakana", reading)
else
kana = NKF.nkf("-Ww --katakana", key)
end
puts(",")
puts([key, body, kana].to_json)
end
puts
puts("]")
eijirodir = $(examples_dictionarydir)/eijiro
dist_eijiro_SCRIPTS = \
eijiro2grn.rb \
eijiro-import.sh
#!/bin/sh
base_dir=$(dirname $0)
if [ 2 != $# ]; then
echo "usage: $0 db_path eijiro.csv_path"
exit 1
fi
if iconv -f UCS2 -t UTF8 $2 | ${base_dir}/eijiro2grn.rb | groonga $1 > /dev/null; then
echo "eijiro data loaded."
fi
#!/usr/bin/env ruby
# -*- coding: utf-8 -*-
$KCODE = 'u'
require 'rubygems'
require 'fastercsv'
class String
def to_json
a = split(//).map {|char|
case char
when '"' then '\\"'
when '\\' then '\\\\'
when "\b" then '\b'
when "\f" then '\f'
when "\n" then '\n'
when "\r" then ''
when "\t" then '\t'
else char
end
}
"\"#{a.join('')}\""
end
end
class Array
def to_json
'[' + map {|element|
element.to_json
}.join(',') + ']'
end
end
puts <<END
column_create item_dictionary eijiro_trans COLUMN_SCALAR ShortText
column_create item_dictionary eijiro_exp COLUMN_SCALAR ShortText
column_create item_dictionary eijiro_level COLUMN_SCALAR Int32
column_create item_dictionary eijiro_memory COLUMN_SCALAR Int32
column_create item_dictionary eijiro_modify COLUMN_SCALAR Int32
column_create item_dictionary eijiro_pron COLUMN_SCALAR ShortText
column_create item_dictionary eijiro_filelink COLUMN_SCALAR ShortText
column_create bigram item_dictionary_eijiro_trans COLUMN_INDEX|WITH_POSITION item_dictionary eijiro_trans
load --table item_dictionary
[["_key","norm","eijiro_trans","eijiro_exp","eijiro_level","eijiro_memory","eijiro_modify","eijiro_pron","eijiro_filelink","kana"],
END
n = 0
FasterCSV.new(ARGF, :row_sep => "\r\n").each {|l|
if n > 0
keyword,word,trans,exp,level,memory,modify,pron,filelink = l
kana = ''
if trans =~ /【@】(.*?)(【|$)/
kana = $1.split("、")
end
puts [word,keyword,trans,exp,level,memory,modify,pron,filelink,kana].map{|e| e || ''}.to_json
end
n += 1
}
puts "]"
gene95dir = $(examples_dictionarydir)/gene95
dist_gene95_SCRIPTS = \
gene2grn.rb \
gene-import.sh
#!/bin/sh
base_dir=$(dirname $0)
if [ 1 != $# -a 2 != $# ]; then
echo "usage: $0 db_path [gene.txt_path]"
exit 1
fi
if [ -z $2 ]; then
dictionary_dir=gene95-dictionary
gene_txt=${dictionary_dir}/gene.txt
if [ ! -f $gene_txt ]; then
gene95_tar_gz=gene95.tar.gz
wget -O $gene95_tar_gz \
http://www.namazu.org/~tsuchiya/sdic/data/gene95.tar.gz
mkdir -p ${dictionary_dir}
tar xvzf ${gene95_tar_gz} -C ${dictionary_dir}
fi
else
gene_txt=$2
fi
if cat $gene_txt | ${base_dir}/gene2grn.rb | groonga $1 > /dev/null; then
echo "gene95 data loaded."
fi
#!/usr/bin/env ruby
require "json"
print(<<HEADER.chomp)
column_create item_dictionary gene95_desc COLUMN_SCALAR ShortText
column_create bigram item_dictionary_gene95_desc COLUMN_INDEX|WITH_POSITION item_dictionary gene95_desc
load --table item_dictionary
[
["_key","gene95_desc"]
HEADER
loop do
raw_key = gets
break if raw_key.nil?
raw_body = gets
key = nil
body = nil
begin
key = raw_key.encode("UTF-8", "Windows-31J").strip
body = raw_body.encode("UTF-8", "Windows-31J").strip
rescue EncodingError
$stderr.puts("Ignore:")
$stderr.puts(" key: <#{raw_key}>")
$stderr.puts(" body: <#{raw_body}>")
next
end
puts(",")
print([key, body].to_json)
end
puts
puts("]")
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="ja" lang="ja">
<head>
<meta http-equiv="Content-Language" content="ja" />
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>groonga dictionary search</title>
<meta http-equiv="content-style-type" content="text/css" />
<meta http-equiv="content-script-type" content="text/javascript" />
<link type="text/css" href="css/smoothness/jquery-ui-1.8.12.custom.css" rel="stylesheet" />
<link type="text/css" rel="stylesheet" href="css/dictionary.css" />
</head>
<body>
<form action="javascript:(function(){$('.search').blur()})()" name="search" id="search">
<input type="text" size="60" maxlength="60" name="key" class="search" />
<input type="submit" value="検索"/>
</form>
<script type="text/javascript" src="js/jquery-1.7.2.js"></script>
<script type="text/javascript" src="js/jquery-ui-1.8.18.custom.js"></script>
<script type="text/javascript" src="js/dictionary.js"></script>
<script type="text/javascript">
$(document).ready(function(){
$(".search").autocomplete({source: dictionarySource("http://" + location.host + "/d/suggest")});
});
</script>
<div id="result"></div>
</body>
</html>
function dictionarySource(url) {
function displayItems(items) {
var results = $("<dl />");
$.each(items,
function(i, val) {
results.append($("<dt />")
.append($("<span />")
.text(val[0])
.click(function() {
$(".search").val($(this).text());
$("#search").submit();
})));
results.append($("<dd />")
.append($("<span />").text(val[1]))
.append($("<span />").text(val[2]))
);
});
$("#result")
.empty()
.append(results);
};
var request_index = 0;
var columns = "_key,gene95_desc,edict_desc";
var xhr;
function source(request, response) {
function onSuccess(data, status) {
if (this.autocomplete_request != request_index) {
return;
}
var completions = data[1]["complete"];
var items = [];
if (completions && completions.length > 2) {
completions.shift();
completions.shift();
$.each(completions,
function(i, item) {
var key = item[0];
items.push(key);
if (items.length >= 3) {
return false;
}
return true;
});
}
if (completions.length > 0) {
displayItems(completions);
}
response(items);
}
function onError() {
if (this.autocomplete_request != request_index) {
return;
}
response([]);
}
if (xhr) {
xhr.abort();
}
xhr = $.ajax(url,
{
data: {
query: request.term,
types: 'complete',
table: 'item_dictionary',
column: 'kana',
limit: 25,
output_columns: columns,
frequency_threshold: 1,
prefix_search: "yes"
},
dataType: "jsonp",
autocomplete_request: ++request_index,
success: onSuccess,
error: onError
});
};
return source;
}
#!/bin/sh
if [ 1 != $# ]; then
echo "usage: $0 db_path"
exit 1
fi
if groonga-suggest-create-dataset $1 dictionary > /dev/null; then
echo "db initialized."
fi
jmdictdir = $(examples_dictionarydir)/jmdict
dist_jmdict_SCRIPTS = \
jmdict.rb
#!/usr/bin/env ruby
# -*- coding: utf-8 -*-
require 'rexml/document'
require 'rexml/parsers/streamparser'
require 'rexml/parsers/baseparser'
require 'rexml/streamlistener'
#REXML::Document.new(STDIN)
class MyListener
include REXML::StreamListener
def tag_start(name, attrs)
# p name, attrs
case name
when 'entry'
@n = 0
end
end
def tag_end name
# p "tag_end: #{x}"
case name
when 'sense'
@n += 1
when 'entry'
@n_ents += 1
puts "#{@ent}:#{@n}" if (@n > 8)
when 'ent_seq'
@ent = @text
end
end
def text(text)
@text = text
end
def xmldecl(version, encoding, standalone)
@n_ents = 0
end
end
REXML::Parsers::StreamParser.new(STDIN, MyListener.new).parse
.. highlightlang:: none
辞書検索ツール
==============
名前
----
groonga辞書検索ツール
説明
----
様々な商用・非商用の辞書ファイルをインポートしてgroongaで検索できるようにします。
対応している辞書
++++++++++++++++
現状では下記の辞書に対応しています。
* EDICT
EDICTは、Monash大学Jim Breen教授が提供している和英辞書です。下記から入手できます。
http://ftp.monash.edu.au/pub/nihongo/edict.gz
* GENE95
GENE95は、Kurumiさん(NiftyID: GGD00145)が作成された英和辞書です。下記から入手できます。
http://www.namazu.org/~tsuchiya/sdic/data/gene95.tar.gz
* 英辞郎
英辞郎は、EDPという団体によって編纂されている英和・和英辞書です。
http://www.eijiro.jp/
書店やオンラインショップなどで購入できます。
データベースの初期化
++++++++++++++++++++
本ディレクトリで下記のように実行し、辞書データを格納するデータベースファイルを下記のようにして初期化します。
./init-db.sh データベースパス名
このようにして作成したデータベースについて、様々な辞書のデータをインポートすることができます。
インポートの方法
++++++++++++++++
* EDICT
edictディレクトリ配下で以下のように実行します。 edict.gzは自動でダウンロードします。
./edict-import.sh データベースパス名
* GENE95
gene95ディレクトリ配下で下記のように実行します。 gene95.tar.gzは自動でダウンロードします。
./gene-import.sh データベースパス名
* 英辞郎
英辞郎に付属のPDICツールを用いてCSVファイル形式に辞書をエクスポートします。(このとき「登録項目」ですべての項目を出力するようにします) eijiroディレクトリ配下で下記のように実行します。
./eijiro-import.sh データベースパス名 出力したCSVファイルのパス名
(英辞郎第四版で動作を確認しています)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment