During development of e-book reader for books, containing Hebrew letters with nekkudot (special symbols, representing vowels), I met a problem, that Android version lower than 3.0 doesn’t support correct display of Hebrew text with vowels.
I found the following solution for this issue. AnkiDroid help forum.
I modified the attached java class
package com.ichi2.libanki.hooks;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import android.util.Log;
public class HebrewFixFilter {
private static final String TAG = "HebrewFix";
/** Regex patterns used in identifying and fixing Hebrew words, so we can reverse them */
private static final Pattern sHebrewPattern = Pattern.compile(
// Two cases caught below:
// Either a series of characters, starting from a hebrew character...
"([[\\u0591-\\u05F4][\\uFB1D-\\uFB4F]]" +
// ...followed by hebrew characters, punctuation, parenthesis, spaces, numbers or numerical symbols...
"[[\\u0591-\\u05F4][\\uFB1D-\\uFB4F],.?!;:\"'\\[\\](){}+\\-*/%=0-9\\s]*" +
// ...and ending with hebrew character, punctuation or numerical symbol
"[[\\u0591-\\u05F4][\\uFB1D-\\uFB4F],.?!;:0-9%])|" +
// or just a single Hebrew character
"([[\\u0591-\\u05F4][\\uFB1D-\\uFB4F]])");
private static final Pattern sHebrewVowelsPattern = Pattern.compile(
"[[\\u0591-\\u05BD][\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7]]");
private static final Pattern sHebrewPatternAll = Pattern.compile(
//This pattern suits Biblical text fully
"([[\\u0591-\\u05F4][\\uFB1D-\\uFB4F]]"+
"[[\\u0591-\\u05BD\\u05D4\\u05DD\\u05DC][\\u05B8\\u05B3\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7],?!;:'0-9%])");
public String applyFixForHebrew(String text) {
Matcher m = sHebrewPatternAll.matcher(text);
StringBuffer sb = new StringBuffer();
while (m.find()) {
String hebrewText = m.group();
String reversed = new StringBuffer (hebrewText).reverse().toString();
String translated = reversed;//translate(reversed);
m.appendReplacement(sb, "" + translated + "" );
//Log.d(TAG,hebrewText+","+reversed);
}
m.appendTail(sb);
return sb.toString();
}
/**
* Translates sections of Hebrew (RTL) unicode into western locations to bypass the flakey BiDi algorithm
* @param text Hebrew text
* @return text in the Western (LTR) alphabet and punctuation range, starting in the extended range
*/
String translate(String text ){
StringBuffer sb = new StringBuffer(text.length());
int codePoint = 0;
for(int i = 0; i < text.length(); i++){
codePoint = text.codePointAt(i);
if ( codePoint >= 1424 && codePoint < 1536){ // Hebrew letters and punctuation
sb.append( String.valueOf((char)(codePoint - 400)));
}else if (codePoint >= 64281 && codePoint < 64336){// Hebrew compound forms and ligatures
sb.append( String.valueOf((char)(codePoint -63138)));
}else if (codePoint >= 59393 && codePoint < 59398){// Some characters from the Private Use Area
sb.append( String.valueOf((char)(codePoint -58257)));
}else if (codePoint >= 59408 && codePoint < 59410){// Some more characters from the Private Use Area
sb.append( String.valueOf((char)(codePoint -58267)));
}else if (codePoint >= 1114131 && codePoint < 1114132){// One last "straggler"
sb.append( String.valueOf((char)(codePoint -1112933)));
}else { //anything not in range, leave as it is
sb.append((text.charAt(i)));
}
}
return sb.toString();
}
}
To use this class in your code, do the following:
String hebText = "some text in hebrew"; HebrewFixFilter fixHeb = new HebrewFixFilter(); String output = fixHeb.applyFixForHebrew(hebText);
Useful link:
Mapping table for Hebrew letters and symbols
Update:
reworked pattern.
private static final Pattern sHebrewPatternAll = Pattern.compile( "([[[\\u0591-\\u05F4][\\uFB1D-\\uFB4F][\\uFB1D-\\uFB4F]][\\u0591-\\u05C7]]"+ "[[[\\u0591-\\u05F4][\\uFB1D-\\uFB4F][\\uFB1D-\\uFB4F]][\\u0591-\\u05C7]]"+ "[[[\\u0591-\\u05F4][\\uFB1D-\\uFB4F][\\uFB1D-\\uFB4F]][\\u0591-\\u05C7]])" );