使用 javascript 函数将西里尔字母音译为拉丁语

声明:本页面是StackOverFlow热门问题的中英对照翻译,遵循CC BY-SA 4.0协议,如果您需要使用它,必须同样遵循CC BY-SA许可,注明原文地址和作者信息,同时你必须将它归于原作者(不是我):StackOverFlow 原文地址: http://stackoverflow.com/questions/11404047/
Warning: these are provided under cc-by-sa 4.0 license. You are free to use/share it, But you must attribute it to the original authors (not me): StackOverFlow

提示:将鼠标放在中文语句上可以显示对应的英文。显示中英文
时间:2020-10-26 13:04:07  来源:igfitidea点击:

transliterating cyrillic to latin with javascript function

javascripttransliteration

提问by kyng

I made this function:

我做了这个功能:

function transliterate(word){

    var answer = "";

    A = new Array();
    A["Ё"]="YO";A["Й"]="I";A["Ц"]="TS";A["У"]="U";A["К"]="K";A["Е"]="E";A["Н"]="N";A["Г"]="G";A["Ш"]="SH";A["Щ"]="SCH";A["З"]="Z";A["Х"]="H";A["Ъ"]="'";
    A["ё"]="yo";A["й"]="i";A["ц"]="ts";A["у"]="u";A["к"]="k";A["е"]="e";A["н"]="n";A["г"]="g";A["ш"]="sh";A["щ"]="sch";A["з"]="z";A["х"]="h";A["ъ"]="'";
    A["Ф"]="F";A["Ы"]="I";A["В"]="V";A["А"]="A";A["П"]="P";A["Р"]="R";A["О"]="O";A["Л"]="L";A["Д"]="D";A["Ж"]="ZH";A["Э"]="E";
    A["ф"]="f";A["ы"]="i";A["в"]="v";A["а"]="a";A["п"]="p";A["р"]="r";A["о"]="o";A["л"]="l";A["д"]="d";A["ж"]="zh";A["э"]="e";
    A["Я"]="YA";A["Ч"]="CH";A["С"]="S";A["М"]="M";A["И"]="I";A["Т"]="T";A["Ь"]="'";A["Б"]="B";A["Ю"]="YU";
    A["я"]="ya";A["ч"]="ch";A["с"]="s";A["м"]="m";A["и"]="i";A["т"]="t";A["ь"]="'";A["б"]="b";A["ю"]="yu";

for (i in word){

    if (A[word[i]] === 'undefined'){
        answer += word[i];
        }
    else {
        answer += A[word[i]];
        }

return answer;
}
}

Now it should transliterate cyrillic text to latin and let latin simply pass. But it only manages to transliterate the first later and in case of latin it gives undefined as an answer. Could anyone give me an idea, what am i doing wrong?

现在它应该将西里尔文本音译为拉丁文,并让拉丁文简单地通过。但它只能设法转写第一个,在拉丁语的情况下,它给出未定义的答案。谁能给我一个想法,我做错了什么?

回答by Split Your Infinity

Couple of things...

几件事...

  1. Use undefined instead of 'undefined'
  2. Don't put the return in the loop
  3. Use hasOwnProperty to filter out functions and properties on the prototype
  4. Use [] instead of new Array()
  5. Use an {} instead of an []
  6. Use lower case variables instead of upper case. Uppercase is reserved for constructors
  1. 使用 undefined 而不是 'undefined'
  2. 不要将返回值放入循环中
  3. 使用 hasOwnProperty 过滤掉原型上的函数和属性
  4. 使用 [] 而不是 new Array()
  5. 使用 {} 而不是 []
  6. 使用小写变量而不是大写。大写保留给构造函数

Here is the code

这是代码

function transliterate(word){
    var answer = ""
      , a = {};

   a["Ё"]="YO";a["Й"]="I";a["Ц"]="TS";a["У"]="U";a["К"]="K";a["Е"]="E";a["Н"]="N";a["Г"]="G";a["Ш"]="SH";a["Щ"]="SCH";a["З"]="Z";a["Х"]="H";a["Ъ"]="'";
   a["ё"]="yo";a["й"]="i";a["ц"]="ts";a["у"]="u";a["к"]="k";a["е"]="e";a["н"]="n";a["г"]="g";a["ш"]="sh";a["щ"]="sch";a["з"]="z";a["х"]="h";a["ъ"]="'";
   a["Ф"]="F";a["Ы"]="I";a["В"]="V";a["А"]="a";a["П"]="P";a["Р"]="R";a["О"]="O";a["Л"]="L";a["Д"]="D";a["Ж"]="ZH";a["Э"]="E";
   a["ф"]="f";a["ы"]="i";a["в"]="v";a["а"]="a";a["п"]="p";a["р"]="r";a["о"]="o";a["л"]="l";a["д"]="d";a["ж"]="zh";a["э"]="e";
   a["Я"]="Ya";a["Ч"]="CH";a["С"]="S";a["М"]="M";a["И"]="I";a["Т"]="T";a["Ь"]="'";a["Б"]="B";a["Ю"]="YU";
   a["я"]="ya";a["ч"]="ch";a["с"]="s";a["м"]="m";a["и"]="i";a["т"]="t";a["ь"]="'";a["б"]="b";a["ю"]="yu";

   for (i in word){
     if (word.hasOwnProperty(i)) {
       if (a[word[i]] === undefined){
         answer += word[i];
       } else {
         answer += a[word[i]];
       }
     }
   }
   return answer;
}

More functional one is like this...

更实用的是这样的...

var a = {"Ё":"YO","Й":"I","Ц":"TS","У":"U","К":"K","Е":"E","Н":"N","Г":"G","Ш":"SH","Щ":"SCH","З":"Z","Х":"H","Ъ":"'","ё":"yo","й":"i","ц":"ts","у":"u","к":"k","е":"e","н":"n","г":"g","ш":"sh","щ":"sch","з":"z","х":"h","ъ":"'","Ф":"F","Ы":"I","В":"V","А":"a","П":"P","Р":"R","О":"O","Л":"L","Д":"D","Ж":"ZH","Э":"E","ф":"f","ы":"i","в":"v","а":"a","п":"p","р":"r","о":"o","л":"l","д":"d","ж":"zh","э":"e","Я":"Ya","Ч":"CH","С":"S","М":"M","И":"I","Т":"T","Ь":"'","Б":"B","Ю":"YU","я":"ya","ч":"ch","с":"s","м":"m","и":"i","т":"t","ь":"'","б":"b","ю":"yu"};

function transliterate(word){
  return word.split('').map(function (char) { 
    return a[char] || char; 
  }).join("");
}

回答by Ali Mamedov

In my projects I am using this method of transliterating:

在我的项目中,我使用这种音译方法:

var transliterate = function(text) {

    text = text
        .replace(/\u0401/g, 'YO')
        .replace(/\u0419/g, 'I')
        .replace(/\u0426/g, 'TS')
        .replace(/\u0423/g, 'U')
        .replace(/\u041A/g, 'K')
        .replace(/\u0415/g, 'E')
        .replace(/\u041D/g, 'N')
        .replace(/\u0413/g, 'G')
        .replace(/\u0428/g, 'SH')
        .replace(/\u0429/g, 'SCH')
        .replace(/\u0417/g, 'Z')
        .replace(/\u0425/g, 'H')
        .replace(/\u042A/g, '')
        .replace(/\u0451/g, 'yo')
        .replace(/\u0439/g, 'i')
        .replace(/\u0446/g, 'ts')
        .replace(/\u0443/g, 'u')
        .replace(/\u043A/g, 'k')
        .replace(/\u0435/g, 'e')
        .replace(/\u043D/g, 'n')
        .replace(/\u0433/g, 'g')
        .replace(/\u0448/g, 'sh')
        .replace(/\u0449/g, 'sch')
        .replace(/\u0437/g, 'z')
        .replace(/\u0445/g, 'h')
        .replace(/\u044A/g, "'")
        .replace(/\u0424/g, 'F')
        .replace(/\u042B/g, 'I')
        .replace(/\u0412/g, 'V')
        .replace(/\u0410/g, 'a')
        .replace(/\u041F/g, 'P')
        .replace(/\u0420/g, 'R')
        .replace(/\u041E/g, 'O')
        .replace(/\u041B/g, 'L')
        .replace(/\u0414/g, 'D')
        .replace(/\u0416/g, 'ZH')
        .replace(/\u042D/g, 'E')
        .replace(/\u0444/g, 'f')
        .replace(/\u044B/g, 'i')
        .replace(/\u0432/g, 'v')
        .replace(/\u0430/g, 'a')
        .replace(/\u043F/g, 'p')
        .replace(/\u0440/g, 'r')
        .replace(/\u043E/g, 'o')
        .replace(/\u043B/g, 'l')
        .replace(/\u0434/g, 'd')
        .replace(/\u0436/g, 'zh')
        .replace(/\u044D/g, 'e')
        .replace(/\u042F/g, 'Ya')
        .replace(/\u0427/g, 'CH')
        .replace(/\u0421/g, 'S')
        .replace(/\u041C/g, 'M')
        .replace(/\u0418/g, 'I')
        .replace(/\u0422/g, 'T')
        .replace(/\u042C/g, "'")
        .replace(/\u0411/g, 'B')
        .replace(/\u042E/g, 'YU')
        .replace(/\u044F/g, 'ya')
        .replace(/\u0447/g, 'ch')
        .replace(/\u0441/g, 's')
        .replace(/\u043C/g, 'm')
        .replace(/\u0438/g, 'i')
        .replace(/\u0442/g, 't')
        .replace(/\u044C/g, "'")
        .replace(/\u0431/g, 'b')
        .replace(/\u044E/g, 'yu');

    return text;
};

Run this sample to transliterate:

运行此示例进行音译:

transliterate('абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ');

I have replaced all russian letters with their unicode analogs (each letter begins with \u) to solve problems with encoding in Javascript file.

我已经用它们的 unicode 类似物(每个字母以 \u 开头)替换了所有俄语字母,以解决在 Javascript 文件中编码的问题。

To check the execution speed, i took the best answer to this question, and compared it with my example. My method appeared to be faster in several times (0.16 ms in Firebug :-).

为了检查执行速度,我选择了这个问题的最佳答案,并将其与我的示例进行了比较。我的方法似乎快了好几倍(Firebug 中为 0.16 毫秒 :-)。

speed comparison in firebug

firebug中的速度比较

回答by kyng

Do not use an array for this task. Do not use for into iterate a string. Do not check against the string "undefined". Do not returnwithin the forloop.

不要为此任务使用数组。不要for in用于迭代字符串。不要检查字符串"undefined"。不要returnfor循环内。

function transliterate(word) {
    var A = {};
    var result = '';

    A["Ё"]="YO";A["Й"]="I";A["Ц"]="TS";A["У"]="U";A["К"]="K";A["Е"]="E";A["Н"]="N";A["Г"]="G";A["Ш"]="SH";A["Щ"]="SCH";A["З"]="Z";A["Х"]="H";A["Ъ"]="'";
    A["ё"]="yo";A["й"]="i";A["ц"]="ts";A["у"]="u";A["к"]="k";A["е"]="e";A["н"]="n";A["г"]="g";A["ш"]="sh";A["щ"]="sch";A["з"]="z";A["х"]="h";A["ъ"]="'";
    A["Ф"]="F";A["Ы"]="I";A["В"]="V";A["А"]="A";A["П"]="P";A["Р"]="R";A["О"]="O";A["Л"]="L";A["Д"]="D";A["Ж"]="ZH";A["Э"]="E";
    A["ф"]="f";A["ы"]="i";A["в"]="v";A["а"]="a";A["п"]="p";A["р"]="r";A["о"]="o";A["л"]="l";A["д"]="d";A["ж"]="zh";A["э"]="e";
    A["Я"]="YA";A["Ч"]="CH";A["С"]="S";A["М"]="M";A["И"]="I";A["Т"]="T";A["Ь"]="'";A["Б"]="B";A["Ю"]="YU";
    A["я"]="ya";A["ч"]="ch";A["с"]="s";A["м"]="m";A["и"]="i";A["т"]="t";A["ь"]="'";A["б"]="b";A["ю"]="yu";

    for(var i = 0; i < word.length; i++) {
        var c = word.charAt(i);

        result += A[c] || c;
    }

    return result;
}

Here is a jsFiddle demonstration.

这是一个 jsFiddle 演示。

回答by T.J. Crowder

Your primary problem is that the returnis in the wrong place. It's insideyour loop, so it returns on the first iteration. Change it to:

您的主要问题是return位置错误。它您的循环内,因此它在第一次迭代时返回。将其更改为:

function transliterate(word){

    var answer = "";

    A = new Array();
    A["Ё"]="YO";A["Й"]="I";A["Ц"]="TS";A["У"]="U";A["К"]="K";A["Е"]="E";A["Н"]="N";A["Г"]="G";A["Ш"]="SH";A["Щ"]="SCH";A["З"]="Z";A["Х"]="H";A["Ъ"]="'";
    A["ё"]="yo";A["й"]="i";A["ц"]="ts";A["у"]="u";A["к"]="k";A["е"]="e";A["н"]="n";A["г"]="g";A["ш"]="sh";A["щ"]="sch";A["з"]="z";A["х"]="h";A["ъ"]="'";
    A["Ф"]="F";A["Ы"]="I";A["В"]="V";A["А"]="A";A["П"]="P";A["Р"]="R";A["О"]="O";A["Л"]="L";A["Д"]="D";A["Ж"]="ZH";A["Э"]="E";
    A["ф"]="f";A["ы"]="i";A["в"]="v";A["а"]="a";A["п"]="p";A["р"]="r";A["о"]="o";A["л"]="l";A["д"]="d";A["ж"]="zh";A["э"]="e";
    A["Я"]="YA";A["Ч"]="CH";A["С"]="S";A["М"]="M";A["И"]="I";A["Т"]="T";A["Ь"]="'";A["Б"]="B";A["Ю"]="YU";
    A["я"]="ya";A["ч"]="ch";A["с"]="s";A["м"]="m";A["и"]="i";A["т"]="t";A["ь"]="'";A["б"]="b";A["ю"]="yu";

    for (i in word){

        if (A[word[i]] === 'undefined'){
            answer += word[i];
            }
        else {
            answer += A[word[i]];
            }

    }
    return answer; // <=== Was *above* the } on the previous line
}

Note that I've fixed the indentation. Consistent indentation helps you avoid these sorts of bugs.

请注意,我已经修复了缩进。一致的缩进可帮助您避免此类错误。



Note 1: There's nothingabout your Aobject that uses the fact it's an Array. You're just using it as a map. In JavaScript, all objects are maps, so rather than A = new Array();just use A = {};.

注意 1:您的对象没有任何A使用它是Array. 您只是将其用作地图。在 JavaScript 中,所有对象都是映射,因此A = new Array();不仅仅是使用A = {};.

Note 2: Aand iare never declared in your function, so you're falling prey to The Horror of Implicit Globals. To fix it, declare them with var.

注意 2:A并且i从未在您的函数中声明,因此您会成为隐式全局的恐怖的牺牲品。要修复它,请使用var.

Note 3: Neither using for..into loop through the characters of a string, nor using []to index into the string, is reliable across JavaScript engines. Instead, use for (i = 0; i < word.length; ++i)and then ch = word.charAt(i);to get the character at that position, then use chin your code within the loop.

注意 3:使用for..in循环遍历字符串的字符或使用[]索引到字符串中,在 JavaScript 引擎中都不可靠。相反,使用for (i = 0; i < word.length; ++i)然后ch = word.charAt(i);在该位置获取字符,然后ch在循环内的代码中使用。

Note 4: You can use the Curiously powerful ||operatorto shorten your code, e.g.:

注意 4:您可以使用Curiously 强大的||运算符来缩短您的代码,例如:

answer += A[ch] || ch;

回答by kinORnirvana

Russian symbols in js don't work at all at my system. I don't know why. So I use the next code for this:

js 中的俄语符号在我的系统中根本不起作用。我不知道为什么。所以我使用下一个代码:

It will not only transliterate but replace all punctuation with '_' and lowercase everything.

它不仅会音译,而且会用“_”替换所有标点符号并小写所有内容。

function translit(str){
 var sp = '_'; 
 var text = str.toLowerCase();
 var transl = { 
  '\u0430': 'a', '\u0431': 'b', '\u0432': 'v', '\u0433': 'g', '\u0434': 'd', '\u0435': 'e', '\u0451': 'e', '\u0436': 'zh',
  '\u0437': 'z', '\u0438': 'i', '\u0439': 'j', '\u043a': 'k', '\u043b': 'l', '\u043c': 'm', '\u043d': 'n', '\u043e': 'o',
  '\u043f': 'p', '\u0440': 'r', '\u0441': 's', '\u0442': 't', '\u0443': 'u', '\u0444': 'f', '\u0445': 'h', '\u0446': 'c', 
  '\u0447': 'ch', '\u0448': 'sh', '\u0449': 'shch', '\u044a': '\'', '\u044b': 'y', '\u044c': '', '\u044d': 'e', '\u044e': 'yu',
  '\u044f': 'ya',  
  '\u00AB':'_', '\u00BB':'_', // ??
  ' ': sp, '_': sp, '`': sp, '~': sp, 
  '!': sp, '@': sp, '#': sp, '$': sp,
  '%': sp, '^': sp, '&': sp, '*': sp, '(': sp, ')': sp, '-': sp, '\=': sp,
  '+': sp, '[': sp, ']': sp, '\': sp, '|': sp, '/': sp, '.': sp, ',': sp,
  '{': sp, '}': sp, '\'': sp, '"': sp, ';': sp, ':': sp, '?': sp, '<': sp,
  '>': sp, '№': sp     
 }
    var result = '';
 var curent_sim = '';
    for(i=0; i < text.length; i++) {
  if(transl[text[i]] != undefined) {   
   if(curent_sim != transl[text[i]] || curent_sim != sp){
    result += transl[text[i]];
    curent_sim = transl[text[i]];    
   }     
  } else {
   result += text[i];
   curent_sim = text[i];
  }  
    }
 result = result.replace(/^_/, '').replace(/_$/, ''); // trim
 return result
}

var result = translit('Привет Мир!');
document.getElementById('alias').value = result;
<html>
  <body>
    <input name="name" type="text" id="alias" />
  </body>
</html>

The code was originally taken here: http://ajaxs.ru/lesson/js/137-transliteracija_stroki_na_javascript.htmland then refactored.

代码最初取自这里:http: //ajaxs.ru/lesson/js/137-transliteracija_stroki_na_javascript.html然后重构。

回答by kyng

By combining recommendations of Bart Riemens and T.J. Crowder I came up with this code, that seams to do the trick nicely:

通过结合 Bart Riemens 和 TJ Crowder 的建议,我想出了这个代码,这很好地完成了这个技巧:

 function transliterate(word){

    var answer = "";
    var a = {}

    a["Ё"]="YO";a["Й"]="I";a["Ц"]="TS";a["У"]="U";a["К"]="K";a["Е"]="E";a["Н"]="N";a["Г"]="G";a["Ш"]="SH";a["Щ"]="SCH";a["З"]="Z";a["Х"]="H";a["Ъ"]="'";
    a["ё"]="yo";a["й"]="i";a["ц"]="ts";a["у"]="u";a["к"]="k";a["е"]="e";a["н"]="n";a["г"]="g";a["ш"]="sh";a["щ"]="sch";a["з"]="z";a["х"]="h";a["ъ"]="'";
    a["Ф"]="F";a["Ы"]="I";a["В"]="V";a["А"]="a";a["П"]="P";a["Р"]="R";a["О"]="O";a["Л"]="L";a["Д"]="D";a["Ж"]="ZH";a["Э"]="E";
    a["ф"]="f";a["ы"]="i";a["в"]="v";a["а"]="a";a["п"]="p";a["р"]="r";a["о"]="o";a["л"]="l";a["д"]="d";a["ж"]="zh";a["э"]="e";
    a["Я"]="Ya";a["Ч"]="CH";a["С"]="S";a["М"]="M";a["И"]="I";a["Т"]="T";a["Ь"]="'";a["Б"]="B";a["Ю"]="YU";
    a["я"]="ya";a["ч"]="ch";a["с"]="s";a["м"]="m";a["и"]="i";a["т"]="t";a["ь"]="'";a["б"]="b";a["ю"]="yu";

    for (i = 0; i < word.length; ++i){

        answer += a[word[i]] === undefined ? word[i] : a[word[i]];
    }   
    return answer;
}

Than you!

比你!