Commit 759448f4 authored by Jan Engelhardt's avatar Jan Engelhardt Committed by Linus Torvalds

Kernel utf-8 handling

This patch fixes dead keys and copy/paste of non-ASCII characters in UTF-8
mode on Linux console.  See more details about the original patch at:
http://chris.heathens.co.nz/linux/utf8.html

Already posted on
	(Oldest) http://lkml.org/lkml/2003/5/31/148
	         http://lkml.org/lkml/2005/12/24/69
	(Recent) http://lkml.org/lkml/2006/8/7/75

[bunk@stusta.de: make drivers/char/selection.c:store_utf8() static]
Signed-off-by: default avatarJan Engelhardt <jengelh@gmx.de>
Cc: Alexander E. Patrakov <patrakov@ums.usu.ru>
Cc: Dmitry Torokhov <dtor@mail.ru>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: default avatarAdrian Bunk <bunk@stusta.de>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent aa0ac365
...@@ -177,6 +177,7 @@ struct uni_pagedir { ...@@ -177,6 +177,7 @@ struct uni_pagedir {
unsigned long refcount; unsigned long refcount;
unsigned long sum; unsigned long sum;
unsigned char *inverse_translations[4]; unsigned char *inverse_translations[4];
u16 *inverse_trans_unicode;
int readonly; int readonly;
}; };
...@@ -207,6 +208,41 @@ static void set_inverse_transl(struct vc_data *conp, struct uni_pagedir *p, int ...@@ -207,6 +208,41 @@ static void set_inverse_transl(struct vc_data *conp, struct uni_pagedir *p, int
} }
} }
static void set_inverse_trans_unicode(struct vc_data *conp,
struct uni_pagedir *p)
{
int i, j, k, glyph;
u16 **p1, *p2;
u16 *q;
if (!p) return;
q = p->inverse_trans_unicode;
if (!q) {
q = p->inverse_trans_unicode =
kmalloc(MAX_GLYPH * sizeof(u16), GFP_KERNEL);
if (!q)
return;
}
memset(q, 0, MAX_GLYPH * sizeof(u16));
for (i = 0; i < 32; i++) {
p1 = p->uni_pgdir[i];
if (!p1)
continue;
for (j = 0; j < 32; j++) {
p2 = p1[j];
if (!p2)
continue;
for (k = 0; k < 64; k++) {
glyph = p2[k];
if (glyph >= 0 && glyph < MAX_GLYPH
&& q[glyph] < 32)
q[glyph] = (i << 11) + (j << 6) + k;
}
}
}
}
unsigned short *set_translate(int m, struct vc_data *vc) unsigned short *set_translate(int m, struct vc_data *vc)
{ {
inv_translate[vc->vc_num] = m; inv_translate[vc->vc_num] = m;
...@@ -217,19 +253,29 @@ unsigned short *set_translate(int m, struct vc_data *vc) ...@@ -217,19 +253,29 @@ unsigned short *set_translate(int m, struct vc_data *vc)
* Inverse translation is impossible for several reasons: * Inverse translation is impossible for several reasons:
* 1. The font<->character maps are not 1-1. * 1. The font<->character maps are not 1-1.
* 2. The text may have been written while a different translation map * 2. The text may have been written while a different translation map
* was active, or using Unicode. * was active.
* Still, it is now possible to a certain extent to cut and paste non-ASCII. * Still, it is now possible to a certain extent to cut and paste non-ASCII.
*/ */
unsigned char inverse_translate(struct vc_data *conp, int glyph) u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode)
{ {
struct uni_pagedir *p; struct uni_pagedir *p;
int m;
if (glyph < 0 || glyph >= MAX_GLYPH) if (glyph < 0 || glyph >= MAX_GLYPH)
return 0; return 0;
else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc) || else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc))
!p->inverse_translations[inv_translate[conp->vc_num]])
return glyph; return glyph;
else else if (use_unicode) {
return p->inverse_translations[inv_translate[conp->vc_num]][glyph]; if (!p->inverse_trans_unicode)
return glyph;
else
return p->inverse_trans_unicode[glyph];
} else {
m = inv_translate[conp->vc_num];
if (!p->inverse_translations[m])
return glyph;
else
return p->inverse_translations[m][glyph];
}
} }
static void update_user_maps(void) static void update_user_maps(void)
...@@ -243,6 +289,7 @@ static void update_user_maps(void) ...@@ -243,6 +289,7 @@ static void update_user_maps(void)
p = (struct uni_pagedir *)*vc_cons[i].d->vc_uni_pagedir_loc; p = (struct uni_pagedir *)*vc_cons[i].d->vc_uni_pagedir_loc;
if (p && p != q) { if (p && p != q) {
set_inverse_transl(vc_cons[i].d, p, USER_MAP); set_inverse_transl(vc_cons[i].d, p, USER_MAP);
set_inverse_trans_unicode(vc_cons[i].d, p);
q = p; q = p;
} }
} }
...@@ -353,6 +400,10 @@ static void con_release_unimap(struct uni_pagedir *p) ...@@ -353,6 +400,10 @@ static void con_release_unimap(struct uni_pagedir *p)
kfree(p->inverse_translations[i]); kfree(p->inverse_translations[i]);
p->inverse_translations[i] = NULL; p->inverse_translations[i] = NULL;
} }
if (p->inverse_trans_unicode) {
kfree(p->inverse_trans_unicode);
p->inverse_trans_unicode = NULL;
}
} }
void con_free_unimap(struct vc_data *vc) void con_free_unimap(struct vc_data *vc)
...@@ -511,6 +562,7 @@ int con_set_unimap(struct vc_data *vc, ushort ct, struct unipair __user *list) ...@@ -511,6 +562,7 @@ int con_set_unimap(struct vc_data *vc, ushort ct, struct unipair __user *list)
for (i = 0; i <= 3; i++) for (i = 0; i <= 3; i++)
set_inverse_transl(vc, p, i); /* Update all inverse translations */ set_inverse_transl(vc, p, i); /* Update all inverse translations */
set_inverse_trans_unicode(vc, p);
return err; return err;
} }
...@@ -561,6 +613,7 @@ int con_set_default_unimap(struct vc_data *vc) ...@@ -561,6 +613,7 @@ int con_set_default_unimap(struct vc_data *vc)
for (i = 0; i <= 3; i++) for (i = 0; i <= 3; i++)
set_inverse_transl(vc, p, i); /* Update all inverse translations */ set_inverse_transl(vc, p, i); /* Update all inverse translations */
set_inverse_trans_unicode(vc, p);
dflt = p; dflt = p;
return err; return err;
} }
...@@ -617,6 +670,19 @@ void con_protect_unimap(struct vc_data *vc, int rdonly) ...@@ -617,6 +670,19 @@ void con_protect_unimap(struct vc_data *vc, int rdonly)
p->readonly = rdonly; p->readonly = rdonly;
} }
/* may be called during an interrupt */
u32 conv_8bit_to_uni(unsigned char c)
{
/*
* Always use USER_MAP. This function is used by the keyboard,
* which shouldn't be affected by G0/G1 switching, etc.
* If the user map still contains default values, i.e. the
* direct-to-font mapping, then assume user is using Latin1.
*/
unsigned short uni = translations[USER_MAP][c];
return uni == (0xf000 | c) ? c : uni;
}
int int
conv_uni_to_pc(struct vc_data *conp, long ucs) conv_uni_to_pc(struct vc_data *conp, long ucs)
{ {
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
* 21-08-02: Converted to input API, major cleanup. (Vojtech Pavlik) * 21-08-02: Converted to input API, major cleanup. (Vojtech Pavlik)
*/ */
#include <linux/consolemap.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/tty.h> #include <linux/tty.h>
...@@ -308,10 +309,9 @@ static void applkey(struct vc_data *vc, int key, char mode) ...@@ -308,10 +309,9 @@ static void applkey(struct vc_data *vc, int key, char mode)
* Many other routines do put_queue, but I think either * Many other routines do put_queue, but I think either
* they produce ASCII, or they produce some user-assigned * they produce ASCII, or they produce some user-assigned
* string, and in both cases we might assume that it is * string, and in both cases we might assume that it is
* in utf-8 already. UTF-8 is defined for words of up to 31 bits, * in utf-8 already.
* but we need only 16 bits here
*/ */
static void to_utf8(struct vc_data *vc, ushort c) static void to_utf8(struct vc_data *vc, uint c)
{ {
if (c < 0x80) if (c < 0x80)
/* 0******* */ /* 0******* */
...@@ -320,11 +320,21 @@ static void to_utf8(struct vc_data *vc, ushort c) ...@@ -320,11 +320,21 @@ static void to_utf8(struct vc_data *vc, ushort c)
/* 110***** 10****** */ /* 110***** 10****** */
put_queue(vc, 0xc0 | (c >> 6)); put_queue(vc, 0xc0 | (c >> 6));
put_queue(vc, 0x80 | (c & 0x3f)); put_queue(vc, 0x80 | (c & 0x3f));
} else { } else if (c < 0x10000) {
if (c >= 0xD800 && c < 0xE000)
return;
if (c == 0xFFFF)
return;
/* 1110**** 10****** 10****** */ /* 1110**** 10****** 10****** */
put_queue(vc, 0xe0 | (c >> 12)); put_queue(vc, 0xe0 | (c >> 12));
put_queue(vc, 0x80 | ((c >> 6) & 0x3f)); put_queue(vc, 0x80 | ((c >> 6) & 0x3f));
put_queue(vc, 0x80 | (c & 0x3f)); put_queue(vc, 0x80 | (c & 0x3f));
} else if (c < 0x110000) {
/* 11110*** 10****** 10****** 10****** */
put_queue(vc, 0xf0 | (c >> 18));
put_queue(vc, 0x80 | ((c >> 12) & 0x3f));
put_queue(vc, 0x80 | ((c >> 6) & 0x3f));
put_queue(vc, 0x80 | (c & 0x3f));
} }
} }
...@@ -393,7 +403,7 @@ static unsigned int handle_diacr(struct vc_data *vc, unsigned int ch) ...@@ -393,7 +403,7 @@ static unsigned int handle_diacr(struct vc_data *vc, unsigned int ch)
return d; return d;
if (kbd->kbdmode == VC_UNICODE) if (kbd->kbdmode == VC_UNICODE)
to_utf8(vc, d); to_utf8(vc, conv_8bit_to_uni(d));
else if (d < 0x100) else if (d < 0x100)
put_queue(vc, d); put_queue(vc, d);
...@@ -407,7 +417,7 @@ static void fn_enter(struct vc_data *vc) ...@@ -407,7 +417,7 @@ static void fn_enter(struct vc_data *vc)
{ {
if (diacr) { if (diacr) {
if (kbd->kbdmode == VC_UNICODE) if (kbd->kbdmode == VC_UNICODE)
to_utf8(vc, diacr); to_utf8(vc, conv_8bit_to_uni(diacr));
else if (diacr < 0x100) else if (diacr < 0x100)
put_queue(vc, diacr); put_queue(vc, diacr);
diacr = 0; diacr = 0;
...@@ -617,7 +627,7 @@ static void k_unicode(struct vc_data *vc, unsigned int value, char up_flag) ...@@ -617,7 +627,7 @@ static void k_unicode(struct vc_data *vc, unsigned int value, char up_flag)
return; return;
} }
if (kbd->kbdmode == VC_UNICODE) if (kbd->kbdmode == VC_UNICODE)
to_utf8(vc, value); to_utf8(vc, conv_8bit_to_uni(value));
else if (value < 0x100) else if (value < 0x100)
put_queue(vc, value); put_queue(vc, value);
} }
...@@ -775,7 +785,7 @@ static void k_shift(struct vc_data *vc, unsigned char value, char up_flag) ...@@ -775,7 +785,7 @@ static void k_shift(struct vc_data *vc, unsigned char value, char up_flag)
/* kludge */ /* kludge */
if (up_flag && shift_state != old_state && npadch != -1) { if (up_flag && shift_state != old_state && npadch != -1) {
if (kbd->kbdmode == VC_UNICODE) if (kbd->kbdmode == VC_UNICODE)
to_utf8(vc, npadch & 0xffff); to_utf8(vc, npadch);
else else
put_queue(vc, npadch & 0xff); put_queue(vc, npadch & 0xff);
npadch = -1; npadch = -1;
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <linux/kbd_kern.h>
#include <linux/vt_kern.h> #include <linux/vt_kern.h>
#include <linux/consolemap.h> #include <linux/consolemap.h>
#include <linux/selection.h> #include <linux/selection.h>
...@@ -34,6 +35,7 @@ extern void poke_blanked_console(void); ...@@ -34,6 +35,7 @@ extern void poke_blanked_console(void);
/* Variables for selection control. */ /* Variables for selection control. */
/* Use a dynamic buffer, instead of static (Dec 1994) */ /* Use a dynamic buffer, instead of static (Dec 1994) */
struct vc_data *sel_cons; /* must not be deallocated */ struct vc_data *sel_cons; /* must not be deallocated */
static int use_unicode;
static volatile int sel_start = -1; /* cleared by clear_selection */ static volatile int sel_start = -1; /* cleared by clear_selection */
static int sel_end; static int sel_end;
static int sel_buffer_lth; static int sel_buffer_lth;
...@@ -54,10 +56,11 @@ static inline void highlight_pointer(const int where) ...@@ -54,10 +56,11 @@ static inline void highlight_pointer(const int where)
complement_pos(sel_cons, where); complement_pos(sel_cons, where);
} }
static unsigned char static u16
sel_pos(int n) sel_pos(int n)
{ {
return inverse_translate(sel_cons, screen_glyph(sel_cons, n)); return inverse_translate(sel_cons, screen_glyph(sel_cons, n),
use_unicode);
} }
/* remove the current selection highlight, if any, /* remove the current selection highlight, if any,
...@@ -86,8 +89,8 @@ static u32 inwordLut[8]={ ...@@ -86,8 +89,8 @@ static u32 inwordLut[8]={
0xFF7FFFFF /* latin-1 accented letters, not division sign */ 0xFF7FFFFF /* latin-1 accented letters, not division sign */
}; };
static inline int inword(const unsigned char c) { static inline int inword(const u16 c) {
return ( inwordLut[c>>5] >> (c & 0x1F) ) & 1; return c > 0xff || (( inwordLut[c>>5] >> (c & 0x1F) ) & 1);
} }
/* set inwordLut contents. Invoked by ioctl(). */ /* set inwordLut contents. Invoked by ioctl(). */
...@@ -108,13 +111,36 @@ static inline unsigned short limit(const unsigned short v, const unsigned short ...@@ -108,13 +111,36 @@ static inline unsigned short limit(const unsigned short v, const unsigned short
return (v > u) ? u : v; return (v > u) ? u : v;
} }
/* stores the char in UTF8 and returns the number of bytes used (1-3) */
static int store_utf8(u16 c, char *p)
{
if (c < 0x80) {
/* 0******* */
p[0] = c;
return 1;
} else if (c < 0x800) {
/* 110***** 10****** */
p[0] = 0xc0 | (c >> 6);
p[1] = 0x80 | (c & 0x3f);
return 2;
} else {
/* 1110**** 10****** 10****** */
p[0] = 0xe0 | (c >> 12);
p[1] = 0x80 | ((c >> 6) & 0x3f);
p[2] = 0x80 | (c & 0x3f);
return 3;
}
}
/* set the current selection. Invoked by ioctl() or by kernel code. */ /* set the current selection. Invoked by ioctl() or by kernel code. */
int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty) int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty)
{ {
struct vc_data *vc = vc_cons[fg_console].d; struct vc_data *vc = vc_cons[fg_console].d;
int sel_mode, new_sel_start, new_sel_end, spc; int sel_mode, new_sel_start, new_sel_end, spc;
char *bp, *obp; char *bp, *obp;
int i, ps, pe; int i, ps, pe, multiplier;
u16 c;
struct kbd_struct *kbd = kbd_table + fg_console;
poke_blanked_console(); poke_blanked_console();
...@@ -158,6 +184,7 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t ...@@ -158,6 +184,7 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t
clear_selection(); clear_selection();
sel_cons = vc_cons[fg_console].d; sel_cons = vc_cons[fg_console].d;
} }
use_unicode = kbd && kbd->kbdmode == VC_UNICODE;
switch (sel_mode) switch (sel_mode)
{ {
...@@ -240,7 +267,8 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t ...@@ -240,7 +267,8 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t
sel_end = new_sel_end; sel_end = new_sel_end;
/* Allocate a new buffer before freeing the old one ... */ /* Allocate a new buffer before freeing the old one ... */
bp = kmalloc((sel_end-sel_start)/2+1, GFP_KERNEL); multiplier = use_unicode ? 3 : 1; /* chars can take up to 3 bytes */
bp = kmalloc((sel_end-sel_start)/2*multiplier+1, GFP_KERNEL);
if (!bp) { if (!bp) {
printk(KERN_WARNING "selection: kmalloc() failed\n"); printk(KERN_WARNING "selection: kmalloc() failed\n");
clear_selection(); clear_selection();
...@@ -251,8 +279,12 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t ...@@ -251,8 +279,12 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t
obp = bp; obp = bp;
for (i = sel_start; i <= sel_end; i += 2) { for (i = sel_start; i <= sel_end; i += 2) {
*bp = sel_pos(i); c = sel_pos(i);
if (!isspace(*bp++)) if (use_unicode)
bp += store_utf8(c, bp);
else
*bp++ = c;
if (!isspace(c))
obp = bp; obp = bp;
if (! ((i + 2) % vc->vc_size_row)) { if (! ((i + 2) % vc->vc_size_row)) {
/* strip trailing blanks from line and add newline, /* strip trailing blanks from line and add newline,
......
...@@ -8,9 +8,12 @@ ...@@ -8,9 +8,12 @@
#define IBMPC_MAP 2 #define IBMPC_MAP 2
#define USER_MAP 3 #define USER_MAP 3
#include <linux/types.h>
struct vc_data; struct vc_data;
extern unsigned char inverse_translate(struct vc_data *conp, int glyph); extern u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode);
extern unsigned short *set_translate(int m, struct vc_data *vc); extern unsigned short *set_translate(int m, struct vc_data *vc);
extern int conv_uni_to_pc(struct vc_data *conp, long ucs); extern int conv_uni_to_pc(struct vc_data *conp, long ucs);
extern u32 conv_8bit_to_uni(unsigned char c);
void console_map_init(void); void console_map_init(void);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment