Commit 3f1e8d8e authored by Arnaud Fontaine's avatar Arnaud Fontaine

itools: Updated to latest version (0.77.8) as it was code from 2009.

Mainly for maintenance sake and to update the code before porting it to python3.
parent 9cae3dc6
......@@ -8,26 +8,18 @@ The intention is that even these few remaining functionalities should
be replaced by native Zope API calls.
The original itools distribution on which this stub is based is the
0.50 branch of the itools repository at:
0.77.8 branch of the itools repository at:
https://github.com/hforge/itools/tree/0.50
The original CREDITS.txt file can be seen at:
https://github.com/hforge/itools/blob/0.50/CREDITS
https://github.com/hforge/itools/tree/0.77
The copyright notice of the original code is as follows:
Copyright
---------
Copyright (C) 2002-2008 Juan David Ibáñez Palomar <jdavid@itaapy.com>
Copyright (C) 2005-2008 Luis Arturo Belmar-Letelier <luis@itaapy.com>
Copyright (C) 2005-2008 Hervé Cauwelier <herve@itaapy.com>
Copyright (C) 2005-2008 Nicolas Deram <nicolas@itaapy.com>
And others. Check the CREDITS file for complete list.
Copyright (C) 2004-2012 J. David Ibáñez <jdavid.ibp@gmail.com>
Copyright (C) 2008 David Versmisse <versmisse@lil.univ-littoral.fr>
Copyright (C) 2009 Hervé Cauwelier <herve@oursours.net>
License
-------
......
# -*- coding: UTF-8 -*-
# Copyright (C) 2006-2008 Juan David Ibáñez Palomar <jdavid@itaapy.com>
# Copyright (C) 2008 Henry Obein <henry@itaapy.com>
# Copyright (C) 2004, 2006-2009 J. David Ibáñez <jdavid.ibp@gmail.com>
# Copyright (C) 2008 Henry Obein <henry.obein@gmail.com>
# Copyright (C) 2010 Hervé Cauwelier <herve@oursours.net>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
......@@ -16,13 +17,13 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Import from itools
from __future__ import absolute_import
from .accept import AcceptLanguageType, get_accept, select_language
from .accept import init_language_selector
from .base import has_language, get_languages, get_language_name
from .fuzzy import get_distance, get_similarity, is_similar, get_most_similar
from .locale_ import format_date, format_time, format_datetime
from .oracle import guess_language, is_asian_character, is_punctuation
from accept import AcceptLanguageType, get_accept, select_language
from accept import init_language_selector
from fuzzy import get_distance, get_similarity, is_similar, get_most_similar
from languages import has_language, get_languages, get_language_name
from locale_ import format_date, format_time, format_datetime
from locale_ import format_number
from oracle import guess_language, is_asian_character, is_punctuation
......@@ -41,6 +42,7 @@ __all__ = [
'format_date',
'format_time',
'format_datetime',
'format_number',
# oracle
'guess_language',
'is_asian_character',
......
# -*- coding: utf-8 -*-
# Copyright (C) 2002-2008 Juan David Ibáñez Palomar <jdavid@itaapy.com>
# -*- coding: UTF-8 -*-
# Copyright (C) 2002-2008, 2010 J. David Ibáñez <jdavid.ibp@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
......
# -*- coding: utf-8 -*-
# Copyright (C) 2002-2003, 2007-2008 Juan David Ibáñez Palomar <jdavid@itaapy.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Import from itools
from ..utils import get_abspath
# Initializes a dictionary containing the iso 639 language codes/names
languages = {}
filename = get_abspath('languages.txt')
for line in open(filename).readlines():
line = line.strip()
if line and line[0] != '#':
code, name = line.split(' ', 1)
languages[code] = name
# Builds a sorted list with the languages code and name
language_codes = languages.keys()
language_codes.sort()
langs = [ {'code': x, 'name': languages[x]} for x in language_codes ]
def has_language(code):
return code in languages
def get_languages():
"""Returns a list of tuples with the code and the name of each language.
"""
return [ x.copy() for x in langs ]
def get_language_name(code):
"""Returns the name of a language.
"""
# FIXME The value returned should be a MSG object, but the MSG class comes
# from the itools.gettext module, which is higher level than itools.i18n
if code in languages:
return languages[code]
return u'???'
# -*- coding: UTF-8 -*-
# Copyright (C) 2004 Thierry Fromon <from.t@free.fr>
# Copyright (C) 2006-2007 Juan David Ibáñez Palomar <jdavid@itaapy.com>
# Copyright (C) 2004, 2006-2007, 2009 J. David Ibáñez <jdavid.ibp@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
......@@ -17,12 +17,11 @@
def get_distance(a, b):
"""
This function was giving by Magnus Lie Hetland. It calculates the gap
"""This function was giving by Magnus Lie Hetland. It calculates the gap
(mathematical distance) between two strings with the cost of word's
translation inside the string.
"""
# XXX Find URL to original code, check license
# FIXME Find URL to original code, check license
c = {}
n = len(a)
m = len(b)
......@@ -52,8 +51,7 @@ def get_similarity(a, b):
def is_similar(a, b, limit=0.8):
"""
Returns True if both text strings are close enough, False otherwise.
"""Returns True if both text strings are close enough, False otherwise.
The optional parameter 'limit' defines the degree of similarity required
to be considered 'close enough', it is a float value between '0'
(completely different) and '1' (the same string).
......@@ -62,8 +60,8 @@ def is_similar(a, b, limit=0.8):
def get_most_similar(a, *args):
"""
Returns the text string from 'args' that is closest to the given string.
"""Returns the text string from 'args' that is closest to the given
string.
"""
if not args:
return None
......
# -*- coding: UTF-8 -*-
# Copyright (C) 2009 J. David Ibáñez <jdavid.ibp@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# ISO 639-2 (alpha-2)
# http://www.loc.gov/standards/iso639-2/php/code_list.php
languages = {
'aa': 'Afar',
'ab': 'Abkhazian',
'ae': 'Avestan',
'af': 'Afrikaans',
'ak': 'Akan',
'am': 'Amharic',
'an': 'Aragonese',
'ar': 'Arabic',
'as': 'Assamese',
'av': 'Avaric',
'ay': 'Aymara',
'az': 'Azerbaijani',
'ba': 'Bashkir',
'be': 'Belarusian',
'bg': 'Bulgarian',
'bh': 'Bihari',
'bi': 'Bislama',
'bm': 'Bambara',
'bn': 'Bengali',
'bo': 'Tibetan',
'br': 'Breton',
'bs': 'Bosnian',
'ca': 'Catalan',
'ce': 'Chechen',
'ch': 'Chamorro',
'co': 'Corsican',
'cr': 'Cree',
'cs': 'Czech',
'c': 'Church Slavic',
'cv': 'Chuvash',
'cy': 'Welsh',
'da': 'Danish',
'de': 'German',
'de-A': 'German/Austria',
'de-DE': 'German/Germany',
'de-CH': 'German/Switzerland',
'dv': 'Divehi; Dhivehi; Maldivian',
'dz': 'Dzongkha',
'ee': 'Ewe',
'el': 'Greek',
'en': 'English',
'en-GB': 'English/United Kingdom',
'en-US': 'English/United States',
'eo': 'Esperanto',
'es': 'Spanish',
'es-AR': 'Spanish/Argentina',
'es-CO': 'Spanish/Colombia',
'es-MX': 'Spanish/Mexico',
'es-ES': 'Spanish/Spain',
'et': 'Estonian',
'e': 'Basque',
'fa': 'Persian',
'ff': 'Fulah',
'fi': 'Finnish',
'fj': 'Fijian',
'fo': 'Faroese',
'fr': 'French',
'fr-BE': 'French/Belgium',
'fr-CA': 'French/Canada',
'fr-FR': 'French/France',
'fr-CH': 'French/Switzerland',
'fy': 'Frisian',
'ga': 'Irish',
'gd': 'Gaelic',
'gl': 'Galician',
'gn': 'Guarani',
'g': 'Gujarati',
'gv': 'Manx',
'ha': 'Hausa',
'he': 'Hebrew',
'hi': 'Hindi',
'ho': 'Hiri Mot',
'hr': 'Croatian',
'ht': 'Haitian',
'h': 'Hungarian',
'hy': 'Armenian',
'hz': 'Herero',
'ia': 'Interlingua',
'id': 'Indonesian',
'ie': 'Interlingue; Occidental',
'ig': 'Igbo',
'ii': 'Sichuan Yi; Nuos',
'ik': 'Inupiak',
'io': 'Ido',
'is': 'Icelandic',
'it': 'Italian',
'i': 'Inuktitut',
'ja': 'Japanese',
'jv': 'Javanese',
'ka': 'Georgian',
'kg': 'Kongo',
'ki': 'Kikuyu; Gikuy',
'kj': 'Kuanyama; Kwanyama',
'kk': 'Kazakh',
'kl': 'Kalaallisut; Greenlandic',
'km': 'Khmer',
'kn': 'Kannada',
'ko': 'Korean',
'kr': 'Kanuri',
'ks': 'Kashmiri',
'k': 'Kurdish',
'kv': 'Komi',
'kw': 'Cornish',
'ky': 'Kirghiz; Kyrgyz',
'la': 'Latin',
'lb': 'Luxembourgish',
'lg': 'Ganda',
'li': 'Limburgan',
'ln': 'Lingala',
'lo': 'Lao',
'lt': 'Lithuanian',
'l': 'Luba-Katanga',
'lv': 'Latvian',
'mg': 'Malagasy',
'mh': 'Marshallese',
'mi': 'Maori',
'mk': 'Macedonian',
'ml': 'Malayalam',
'mn': 'Mongolian',
'mr': 'Marathi',
'ms': 'Malay',
'mt': 'Maltese',
'my': 'Burmese',
'na': 'Naur',
'nb': 'Norwegian Bokmal',
'nd': 'Ndebele, North',
'ne': 'Nepali',
'ng': 'Ndonga',
'nl': 'Dutch',
'nl-BE': 'Dutch/Belgium',
'nn': 'Norwegian Nyrnosk',
'no': 'Norwegian',
'nr': 'Ndebele, South',
'nv': 'Navajo; Navaho',
'ny': 'Chichewa; Chewa; Nyanja',
'oc': 'Occitan',
'oj': 'Ojibwa',
'om': 'Oromo',
'or': 'Oriya',
'os': 'Ossetian; Ossetic',
'pa': 'Panjabi; Punjabi',
'pi': 'Pali',
'pl': 'Polish',
'ps': 'Pushto; Pashto',
'pt': 'Portuguese',
'pt-BR': 'Portuguese/Brazil',
'q': 'Quechua',
'rm': 'Romansh',
'rn': 'Rundi',
'ro': 'Romanian; Moldavian',
'r': 'Russian',
'rw': 'Kinyarwanda',
'sa': 'Sanskrit',
'sc': 'Sardinian',
'sd': 'Sindhi',
'se': 'Sami',
'sg': 'Sango',
'si': 'Sinhala; Sinhalese',
'sk': 'Slovak',
'sl': 'Slovenian',
'sm': 'Samoan',
'sn': 'Shona',
'so': 'Somali',
'sq': 'Albanian',
'sr': 'Serbian',
'ss': 'Swati',
'st': 'Sotho',
's': 'Sundanese',
'sv': 'Swedish',
'sw': 'Swahili',
'ta': 'Tamil',
'te': 'Telug',
'tg': 'Tajik',
'th': 'Thai',
'ti': 'Tigrinya',
'tk': 'Turkmen',
'tl': 'Tagalog',
'tn': 'Tswana',
'to': 'Tonga',
'tr': 'Turkish',
'ts': 'Tsonga',
'tt': 'Tatar',
'tw': 'Twi',
'ty': 'Tahitian',
'ug': 'Uighur; Uyghur',
'uk': 'Ukrainian',
'ur': 'Urd',
'uz': 'Uzbek',
've': 'Venda',
'vi': 'Vietnamese',
'vo': 'Volapuk',
'wa': 'Walloon',
'wo': 'Wolof',
'xh': 'Xhosa',
'yi': 'Yiddish',
'yo': 'Yoruba',
'za': 'Zhuang; Chuang',
'zh': 'Chinese',
'zh-CN': 'Chinese/China',
'zh-TW': 'Chinese/Taiwan',
'z': 'Zul',
}
langs = [ {'code': x, 'name': languages[x]} for x in sorted(languages.keys()) ]
###########################################################################
# API
###########################################################################
def has_language(code):
return code in languages
def get_languages():
"""Returns a list of tuples with the code and the name of each language.
"""
return [ x.copy() for x in langs ]
def get_language_name(code):
"""Returns the name of a language.
"""
# FIXME The value returned should be a MSG object, but the MSG class comes
# from the itools.gettext module, which is higher level than itools.i18n
if code in languages:
return languages[code]
return '???'
# ISO 639-2
# http://www.loc.gov/standards/iso639-2/php/code_list.php
#
# Format: "<alpha-2 code> <language name>"
aa Afar
ab Abkhazian
ae Avestan
af Afrikaans
ak Akan
am Amharic
an Aragonese
ar Arabic
as Assamese
av Avaric
ay Aymara
az Azerbaijani
ba Bashkir
be Belarusian
bg Bulgarian
bh Bihari
bi Bislama
bm Bambara
bn Bengali
bo Tibetan
br Breton
bs Bosnian
ca Catalan
ce Chechen
ch Chamorro
co Corsican
cr Cree
cs Czech
cu Church Slavic
cv Chuvash
cy Welsh
da Danish
de German
de-AU German/Austria
de-DE German/Germany
de-CH German/Switzerland
dv Divehi; Dhivehi; Maldivian
dz Dzongkha
ee Ewe
el Greek
en English
en-GB English/United Kingdom
en-US English/United States
eo Esperanto
es Spanish
es-AR Spanish/Argentina
es-CO Spanish/Colombia
es-MX Spanish/Mexico
es-ES Spanish/Spain
et Estonian
eu Basque
fa Persian
ff Fulah
fi Finnish
fj Fijian
fo Faroese
fr French
fr-BE French/Belgium
fr-CA French/Canada
fr-FR French/France
fr-CH French/Switzerland
fy Frisian
ga Irish
gd Gaelic
gl Galician
gn Guarani
gu Gujarati
gv Manx
ha Hausa
he Hebrew
hi Hindi
ho Hiri Motu
hr Croatian
ht Haitian
hu Hungarian
hy Armenian
hz Herero
ia Interlingua
id Indonesian
ie Interlingue; Occidental
ig Igbo
ii Sichuan Yi; Nuosu
ik Inupiak
io Ido
is Icelandic
it Italian
iu Inuktitut
ja Japanese
jv Javanese
ka Georgian
kg Kongo
ki Kikuyu; Gikuyu
kj Kuanyama; Kwanyama
kk Kazakh
kl Kalaallisut; Greenlandic
km Khmer
kn Kannada
ko Korean
kr Kanuri
ks Kashmiri
ku Kurdish
kv Komi
kw Cornish
ky Kirghiz; Kyrgyz
la Latin
lb Luxembourgish
lg Ganda
li Limburgan
ln Lingala
lo Lao
lt Lithuanian
lu Luba-Katanga
lv Latvian
mg Malagasy
mh Marshallese
mi Maori
mk Macedonian
ml Malayalam
mn Mongolian
mr Marathi
ms Malay
mt Maltese
my Burmese
na Nauru
nb Norwegian Bokmal
nd Ndebele, North
ne Nepali
ng Ndonga
nl Dutch
nl-BE Dutch/Belgium
nn Norwegian Nyrnosk
no Norwegian
nr Ndebele, South
nv Navajo; Navaho
ny Chichewa; Chewa; Nyanja
oc Occitan
oj Ojibwa
om Oromo
os Ossetian; Ossetic
or Oriya
pa Panjabi; Punjabi
pi Pali
pl Polish
ps Pushto; Pashto
pt Portuguese
pt-BR Portuguese/Brazil
qu Quechua
rm Romansh
rn Rundi
ro Romanian; Moldavian
ru Russian
rw Kinyarwanda
sa Sanskrit
sc Sardinian
sd Sindhi
se Sami
sg Sango
si Sinhala; Sinhalese
sk Slovak
sl Slovenian
sm Samoan
sn Shona
so Somali
sq Albanian
sr Serbian
ss Swati
st Sotho
su Sundanese
sv Swedish
sw Swahili
ta Tamil
te Telugu
tg Tajik
th Thai
ti Tigrinya
tk Turkmen
tl Tagalog
tn Tswana
to Tonga
tr Turkish
ts Tsonga
tt Tatar
tw Twi
ty Tahitian
ug Uighur; Uyghur
uk Ukrainian
ur Urdu
uz Uzbek
ve Venda
vi Vietnamese
vo Volapuk
wa Walloon
wo Wolof
xh Xhosa
yi Yiddish
yo Yoruba
za Zhuang; Chuang
zh Chinese
zh-CN Chinese/China
zh-TW Chinese/Taiwan
zu Zulu
# Add here the languages you need
# -*- coding: UTF-8 -*-
# Copyright (C) 2007 Juan David Ibáñez Palomar <jdavid@itaapy.com>
# Copyright (C) 2007, 2009 J. David Ibáñez <jdavid.ibp@gmail.com>
# Copyright (C) 2010 Hervé Cauwelier <herve@oursours.net>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
......@@ -17,50 +18,140 @@
"""
Output dates and times in locale format.
"""
from __future__ import absolute_import
# Import from itools
from .accept import get_accept
formats = {
# Date, Time, DateTime
'en': ('%d/%m/%Y', '%H:%M', '%d/%m/%Y %H:%M'),
'es': ('%d/%m/%Y', '%H.%M', '%d/%m/%Y %H.%M'),
'fr': ('%d/%m/%Y', '%Hh%M', '%d/%m/%Y %Hh%M'),
}
available_languages = formats.keys()
# Import from the Standard Library
from decimal import Decimal
# Import from itools
from accept import get_accept
def get_format(accept):
def get_format(source, accept):
# By default use the computer's locale
if accept is None:
accept = get_accept()
# Negotiate
available_languages = source.keys()
language = accept.select_language(available_languages)
if language is None:
language = 'en'
# The format
return formats[language]
return source[language]
#
# Date and Time
#
def format_date(x, accept=None):
format = get_format(accept)[0]
format = get_format(date_formats, accept)[0]
return x.strftime(format)
def format_time(x, accept=None):
format = get_format(accept)[1]
format = get_format(date_formats, accept)[1]
return x.strftime(format)
def format_datetime(x, accept=None):
format = get_format(accept)[2]
format = get_format(date_formats, accept)[2]
return x.strftime(format)
#
# Decimal
#
# http://docs.python.org/library/decimal.html#recipes
# Modified for unicode and trailing currency
def moneyfmt(value, places=2, curr=u'', sep=u',', dp=u'.', pos=u'',
neg=u'-', trailneg=u''):
"""Convert Decimal to a money formatted unicode.
places: required number of places after the decimal point
curr: optional currency symbol (may be blank)
sep: optional grouping separator (comma, period, space, or blank)
dp: decimal point indicator (comma or period)
only specify as blank when places is zero
pos: optional sign for positive numbers: '+', space or blank
neg: optional sign for negative numbers: '-', '(', space or blank
trailneg:optional trailing minus indicator: '-', ')', space or blank
>>> d = Decimal('-1234567.8901')
>>> moneyfmt(d, curr='$')
'-1,234,567.89$'
>>> moneyfmt(d, places=0, sep='.', dp='', neg='', trailneg='-')
'1.234.568-'
>>> moneyfmt(d, curr='$', neg='(', trailneg=')')
'(1,234,567.89$)'
>>> moneyfmt(Decimal(123456789), sep=' ')
'123 456 789.00'
>>> moneyfmt(Decimal('-0.02'), neg='<', trailneg='>')
'<0.02>'
"""
q = Decimal(10) ** -places # 2 places --> '0.01'
sign, digits, exp = value.quantize(q).as_tuple()
result = []
digits = map(unicode, digits)
build, next = result.append, digits.pop
if curr:
build(curr)
if sign:
build(trailneg)
for i in range(places):
build(next() if digits else u'0')
build(dp)
if not digits:
build(u'0')
i = 0
while digits:
build(next())
i += 1
if i == 3 and digits:
i = 0
build(sep)
build(neg if sign else pos)
return u''.join(reversed(result))
def format_number(x, places=2, curr='', pos=u'', neg=u'-', trailneg=u"",
accept=None):
"""Convert Decimal to a number formatted unicode.
places: required number of places after the decimal point
curr: optional currency symbol (may be blank)
pos: optional sign for positive numbers: '+', space or blank
neg: optional sign for negative numbers: '-', '(', space or blank
trailneg:optional trailing minus indicator: '-', ')', space or blank
"""
if type(x) is not Decimal:
x = Decimal(x)
format = get_format(number_formats, accept)
return moneyfmt(x, places=places, curr=curr, pos=pos, neg=neg,
trailneg=trailneg, **format)
###########################################################################
# Initialize the module
###########################################################################
date_formats = {
# Date, Time, DateTime
'en': ('%d/%m/%Y', '%H:%M', '%d/%m/%Y %H:%M'),
'es': ('%d/%m/%Y', '%H.%M', '%d/%m/%Y %H.%M'),
'fr': ('%d/%m/%Y', '%Hh%M', '%d/%m/%Y %Hh%M'),
}
number_formats = {
# See "moneyfmt" docstring for help
'en': {'sep': u',', 'dp': u'.'},
'es': {'sep': u'.', 'dp': u','},
'fr': {'sep': u' ', 'dp': u','},
}
# -*- coding: UTF-8 -*-
# Copyright (C) 2004 Thierry Fromon <from.t@free.fr>
# Copyright (C) 2004, 2006-2007 Juan David Ibáñez Palomar <jdavid@itaapy.com>
# Copyright (C) 2008 Henry Obein <henry@itaapy.com>
# Copyright (C) 2004, 2006-2007, 2009 J. David Ibáñez <jdavid.ibp@gmail.com>
# Copyright (C) 2008 Henry Obein <henry.obein@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
......@@ -16,143 +16,6 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
###########################################################################
# To add a new language, edit the dictionaries below:
#
# - positive_chars
#
# Defines special characters (like accentuated characters) that belong
# to the language.
#
# - negative_chars
#
# Defines special characters (like accentuated characters) that do not
# belong to the language.
#
# - positive_words
#
# Defines common words that belong to the language.
#
# - negative_words
#
# Defines some words that do not belong to the language.
###########################################################################
import unicodedata
positive_chars = {
u'¡': ['es'],
u'¿': ['es'],
u'ä': ['de'],
u'ß': ['de'],
u'ç': ['fr'],
u'ê': ['fr'],
u'í': ['es'],
u'ñ': ['es'],
u'ö': ['de'],
u'ó': ['es'],
u'ü': ['de'],
u'ú': ['es'],
# Asian languages
# Japanese : based on particles (hiragana)
u'の': ['ja'],
u'は': ['ja'],
u'で': ['ja'],
u'に': ['ja'],
u'が': ['ja'],
u'へ': ['ja'],
u'を': ['ja'],
u'や': ['ja'],
u'と': ['ja'],
# Japanese : punctuation
u'、': ['ja'],
u'。': ['ja'],
}
negative_chars = {}
positive_words = {
u'à': ['fr'],
u'al': ['es'],
u'an': ['en'],
u'and': ['en'],
u'are': ['en'],
u'as': ['en'],
u'aux': ['fr'],
u'but': ['en'],
u'como': ['es'],
u'con': ['es'],
u'de': ['es', 'fr'],
u'del': ['es'],
u'des': ['fr'],
u'donc': ['fr'],
u'du': ['fr'],
u'el': ['es'],
u'elle': ['fr'],
u'elles': ['fr'],
u'es': ['es'],
u'est': ['fr'],
u'está': ['es'],
u'et': ['fr'],
u'from': ['en'],
u'hay': ['es'],
u'he': ['en', 'es'],
u'i': ['en'],
u'il': ['fr'],
u'ils': ['fr'],
u'in': ['en'],
u'is': ['en'],
u'it': ['en'],
u'je': ['fr'],
u'las': ['es'],
u'le': ['es', 'fr'],
u'lo': ['es'],
u'les': ['es', 'fr'],
u'los': ['es'],
u'mais': ['fr'],
u'no': ['en', 'es'],
u'nous': ['fr'],
u'nueva': ['es'],
u'o': ['es'],
u'of': ['en'],
u'on': ['en'],
u'or': ['en'],
u'où': ['fr'],
u'para': ['es'],
u'pero': ['es'],
u'por': ['es'],
u'que': ['es', 'fr'],
u'qué': ['es'],
u'she': ['en'],
u'su': ['es'],
u'sur': ['fr'],
u'that': ['en'],
u'the': ['en'],
u'their': ['en'],
u'this': ['en'],
u'to': ['en'],
u'tu': ['es', 'fr'],
u'un': ['es', 'fr'],
u'una': ['es'],
u'une': ['fr'],
u'vous': ['fr'],
u'when': ['en'],
u'where': ['en'],
u'y': ['es'],
u'you': ['en'],
u'your': ['en'],
}
negative_words = {
u'du': ['es'],
}
# One thousand words should be enough
MAX_WORDS = 1000
def is_asian_character(c):
......@@ -340,3 +203,138 @@ def guess_language(text):
return None
###########################################################################
# Initialize the module
#
# To add a new language, edit the dictionaries below:
#
# - positive_chars
# Defines special characters (like accentuated characters) that belong to
# the language.
#
# - negative_chars
# Defines special characters (like accentuated characters) that do not
# belong to the language.
#
# - positive_words
# Defines common words that belong to the language.
#
# - negative_words
# Defines some words that do not belong to the language.
#
###########################################################################
positive_chars = {
u'¡': ['es'],
u'¿': ['es'],
u'ä': ['de'],
u'ß': ['de'],
u'ç': ['fr'],
u'ê': ['fr'],
u'í': ['es'],
u'ñ': ['es'],
u'ö': ['de'],
u'ó': ['es'],
u'ü': ['de'],
u'ú': ['es'],
# Asian languages
# Japanese : based on particles (hiragana)
u'の': ['ja'],
u'は': ['ja'],
u'で': ['ja'],
u'に': ['ja'],
u'が': ['ja'],
u'へ': ['ja'],
u'を': ['ja'],
u'や': ['ja'],
u'と': ['ja'],
# Japanese : punctuation
u'、': ['ja'],
u'。': ['ja'],
}
negative_chars = {}
positive_words = {
u'à': ['fr'],
u'al': ['es'],
u'an': ['en'],
u'and': ['en'],
u'are': ['en'],
u'as': ['en'],
u'aux': ['fr'],
u'but': ['en'],
u'como': ['es'],
u'con': ['es'],
u'de': ['es', 'fr'],
u'del': ['es'],
u'des': ['fr'],
u'donc': ['fr'],
u'du': ['fr'],
u'el': ['es'],
u'elle': ['fr'],
u'elles': ['fr'],
u'es': ['es'],
u'est': ['fr'],
u'está': ['es'],
u'et': ['fr'],
u'from': ['en'],
u'hay': ['es'],
u'he': ['en', 'es'],
u'i': ['en'],
u'il': ['fr'],
u'ils': ['fr'],
u'in': ['en'],
u'is': ['en'],
u'it': ['en'],
u'je': ['fr'],
u'las': ['es'],
u'le': ['es', 'fr'],
u'lo': ['es'],
u'les': ['es', 'fr'],
u'los': ['es'],
u'mais': ['fr'],
u'no': ['en', 'es'],
u'nous': ['fr'],
u'nueva': ['es'],
u'o': ['es'],
u'of': ['en'],
u'on': ['en'],
u'or': ['en'],
u'où': ['fr'],
u'para': ['es'],
u'pero': ['es'],
u'por': ['es'],
u'que': ['es', 'fr'],
u'qué': ['es'],
u'she': ['en'],
u'su': ['es'],
u'sur': ['fr'],
u'that': ['en'],
u'the': ['en'],
u'their': ['en'],
u'this': ['en'],
u'to': ['en'],
u'tu': ['es', 'fr'],
u'un': ['es', 'fr'],
u'una': ['es'],
u'une': ['fr'],
u'vous': ['fr'],
u'when': ['en'],
u'where': ['en'],
u'y': ['es'],
u'you': ['en'],
u'your': ['en'],
}
negative_words = {
u'du': ['es'],
}
# One thousand words should be enough
MAX_WORDS = 1000
# -*- coding: utf-8 -*-
# Copyright (C) 2006-2008 Juan David Ibáñez Palomar <jdavid@itaapy.com>
# Copyright (C) 2008 Gautier Hayoun <gautier.hayoun@itaapy.com>
# -*- coding: UTF-8 -*-
# Copyright (C) 2008-2010 J. David Ibáñez <jdavid.ibp@gmail.com>
# Copyright (C) 2009 David Versmisse <versmisse@lil.univ-littoral.fr>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
......@@ -16,19 +16,12 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Import from the Standard Library
from distutils import core
from distutils.errors import DistutilsOptionError
from distutils.command.build_py import build_py
from distutils.command.register import register
from distutils.command.upload import upload
from getpass import getpass
from mimetypes import MimeTypes
from os import getcwd, open as os_open, devnull, dup2, O_RDWR
from os.path import exists, join as join_path, sep, splitdrive
from re import search
from sys import _getframe, platform, exit, stdin, stdout, stderr
from urllib2 import HTTPPasswordMgr
import sys
from os import getcwd
from os.path import exists, join, sep, splitdrive
from subprocess import Popen, PIPE
from sys import _getframe, modules, getsizeof
from gc import get_referents
def get_abspath(local_path, mname=None):
"""Returns the absolute path to the required file.
......@@ -39,19 +32,73 @@ def get_abspath(local_path, mname=None):
if mname == '__main__' or mname == '__init__':
mpath = getcwd()
else:
module = sys.modules[mname]
module = modules[mname]
if hasattr(module, '__path__'):
mpath = module.__path__[0]
elif '.' in mname:
mpath = sys.modules[mname[:mname.rfind('.')]].__path__[0]
mpath = modules[mname[:mname.rfind('.')]].__path__[0]
else:
mpath = mname
drive, mpath = splitdrive(mpath)
mpath = drive + join_path(mpath, local_path)
mpath = drive + join(mpath, local_path)
# Make it working with Windows. Internally we use always the "/".
if sep == '\\':
mpath = mpath.replace(sep, '/')
return mpath
def get_version(mname=None):
if mname is None:
mname = _getframe(1).f_globals.get('__name__')
path = get_abspath('version.txt', mname=mname)
if exists(path):
return open(path).read().strip()
return None
def merge_dicts(d, *args, **kw):
"""Merge two or more dictionaries into a new dictionary object.
"""
new_d = d.copy()
for dic in args:
new_d.update(dic)
new_d.update(kw)
return new_d
def get_sizeof(obj):
"""Return the size of an object and all objects refered by it.
"""
size = 0
done = set()
todo = {id(obj): obj}
while todo:
obj_id, obj = todo.popitem()
size += getsizeof(obj)
done.add(obj_id)
done.add(id(obj.__class__)) # Do not count the class
for obj in get_referents(obj):
obj_id = id(obj)
if obj_id not in done:
todo[obj_id] = obj
return size
def get_pipe(command, cwd=None):
"""Wrapper around 'subprocess.Popen'
"""
popen = Popen(command, stdout=PIPE, stderr=PIPE, cwd=cwd)
stdoutdata, stderrdata = popen.communicate()
if popen.returncode != 0:
raise EnvironmentError, (popen.returncode, stderrdata)
return stdoutdata
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment