Commit 4bc4634e authored by Samuel Thibault's avatar Samuel Thibault Committed by Greg Kroah-Hartman
Browse files

speakup: Turn i18n files utf-8



i18n currently assume latin1 encoding, which is not enough for most
languages.

This separates out the utf-8 processing of /dev/synthu, and uses it for
a new synth_writeu, which we make synth_printf now use. This has the
effect of making all the i18 messages processed in utf-8.

Signed-off-by: default avatarSamuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20240327115051.ng7xqnhozyii4ik2@begin


Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent fec50db7
Loading
Loading
Loading
Loading
+13 −46
Original line number Diff line number Diff line
@@ -39,13 +39,13 @@ static ssize_t speakup_file_write(struct file *fp, const char __user *buffer,
static ssize_t speakup_file_writeu(struct file *fp, const char __user *buffer,
				   size_t nbytes, loff_t *ppos)
{
	size_t count = nbytes, want;
	size_t count = nbytes, consumed, want;
	const char __user *ptr = buffer;
	size_t bytes;
	unsigned long flags;
	unsigned char buf[256];
	u16 ubuf[256];
	size_t in, in2, out;
	size_t in, out;

	if (!synth)
		return -ENODEV;
@@ -58,57 +58,24 @@ static ssize_t speakup_file_writeu(struct file *fp, const char __user *buffer,
			return -EFAULT;

		/* Convert to u16 */
		for (in = 0, out = 0; in < bytes; in++) {
			unsigned char c = buf[in];
			int nbytes = 8 - fls(c ^ 0xff);
			u32 value;

			switch (nbytes) {
			case 8: /* 0xff */
			case 7: /* 0xfe */
			case 1: /* 0x80 */
				/* Invalid, drop */
				goto drop;

			case 0:
				/* ASCII, copy */
				ubuf[out++] = c;
				continue;
		for (in = 0, out = 0; in < bytes; in += consumed) {
			s32 value;

			default:
				/* 2..6-byte UTF-8 */
			value = synth_utf8_get(buf + in, bytes - in, &consumed, &want);
			if (value == -1) {
				/* Invalid or incomplete */

				if (bytes - in < nbytes) {
				if (want > bytes - in)
					/* We don't have it all yet, stop here
					 * and wait for the rest
					 */
					bytes = in;
					want = nbytes;
					continue;
				}

				/* First byte */
				value = c & ((1u << (7 - nbytes)) - 1);

				/* Other bytes */
				for (in2 = 2; in2 <= nbytes; in2++) {
					c = buf[in + 1];
					if ((c & 0xc0) != 0x80)	{
						/* Invalid, drop the head */
						want = 1;
						goto drop;
					}
					value = (value << 6) | (c & 0x3f);
					in++;
				continue;
			}

			if (value < 0x10000)
				ubuf[out++] = value;
				want = 1;
				break;
			}
drop:
			/* empty statement */;
		}

		count -= bytes;
+2 −0
Original line number Diff line number Diff line
@@ -76,7 +76,9 @@ int speakup_paste_selection(struct tty_struct *tty);
void speakup_cancel_paste(void);
void speakup_register_devsynth(void);
void speakup_unregister_devsynth(void);
s32 synth_utf8_get(const char *buf, size_t count, size_t *consumed, size_t *want);
void synth_write(const char *buf, size_t count);
void synth_writeu(const char *buf, size_t count);
int synth_supports_indexing(void);

extern struct vc_data *spk_sel_cons;
+87 −5
Original line number Diff line number Diff line
@@ -217,10 +217,95 @@ void synth_write(const char *_buf, size_t count)
	synth_start();
}

/* Consume one utf-8 character from buf (that contains up to count bytes),
 * returns the unicode codepoint if valid, -1 otherwise.
 * In all cases, returns the number of consumed bytes in *consumed,
 * and the minimum number of bytes that would be needed for the next character
 * in *want.
 */
s32 synth_utf8_get(const char *buf, size_t count, size_t *consumed, size_t *want)
{
	unsigned char c = buf[0];
	int nbytes = 8 - fls(c ^ 0xff);
	u32 value;
	size_t i;

	switch (nbytes) {
	case 8: /* 0xff */
	case 7: /* 0xfe */
	case 1: /* 0x80 */
		/* Invalid, drop */
		*consumed = 1;
		*want = 1;
		return -1;

	case 0:
		/* ASCII, take as such */
		*consumed = 1;
		*want = 1;
		return c;

	default:
		/* 2..6-byte UTF-8 */

		if (count < nbytes) {
			/* We don't have it all */
			*consumed = 0;
			*want = nbytes;
			return -1;
		}

		/* First byte */
		value = c & ((1u << (7 - nbytes)) - 1);

		/* Other bytes */
		for (i = 1; i < nbytes; i++) {
			c = buf[i];
			if ((c & 0xc0) != 0x80)	{
				/* Invalid, drop the head */
				*consumed = i;
				*want = 1;
				return -1;
			}
			value = (value << 6) | (c & 0x3f);
		}

		*consumed = nbytes;
		*want = 1;
		return value;
	}
}

void synth_writeu(const char *buf, size_t count)
{
	size_t i, consumed, want;

	/* Convert to u16 */
	for (i = 0; i < count; i++) {
		s32 value;

		value = synth_utf8_get(buf + i, count - i, &consumed, &want);
		if (value == -1) {
			/* Invalid or incomplete */

			if (want > count - i)
				/* We don't have it all, stop */
				count = i;

			continue;
		}

		if (value < 0x10000)
			synth_buffer_add(value);
	}

	synth_start();
}

void synth_printf(const char *fmt, ...)
{
	va_list args;
	unsigned char buf[160], *p;
	unsigned char buf[160];
	int r;

	va_start(args, fmt);
@@ -229,10 +314,7 @@ void synth_printf(const char *fmt, ...)
	if (r > sizeof(buf) - 1)
		r = sizeof(buf) - 1;

	p = buf;
	while (r--)
		synth_buffer_add(*p++);
	synth_start();
	synth_writeu(buf, r);
}
EXPORT_SYMBOL_GPL(synth_printf);