Android WebView to display vowelized Hebrew letters

During development of e-book reader for books, containing Hebrew letters with nekkudot (special symbols, representing vowels), I met a problem, that Android version lower than 3.0 doesn’t support correct display of Hebrew text with vowels.

I found the following solution for this issue. AnkiDroid help forum.
I modified the attached java class

package com.ichi2.libanki.hooks;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import android.util.Log;

public class HebrewFixFilter {
	private static final String TAG = "HebrewFix";
	/** Regex patterns used in identifying and fixing Hebrew words, so we can reverse them */
    private static final Pattern sHebrewPattern = Pattern.compile(
            // Two cases caught below:
            // Either a series of characters, starting from a hebrew character...
            "([[\\u0591-\\u05F4][\\uFB1D-\\uFB4F]]" +
            // ...followed by hebrew characters, punctuation, parenthesis, spaces, numbers or numerical symbols...
            "[[\\u0591-\\u05F4][\\uFB1D-\\uFB4F],.?!;:\"'\\[\\](){}+\\-*/%=0-9\\s]*" +
            // ...and ending with hebrew character, punctuation or numerical symbol
            "[[\\u0591-\\u05F4][\\uFB1D-\\uFB4F],.?!;:0-9%])|" +
            // or just a single Hebrew character
            "([[\\u0591-\\u05F4][\\uFB1D-\\uFB4F]])");
    private static final Pattern sHebrewVowelsPattern = Pattern.compile(
            "[[\\u0591-\\u05BD][\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7]]");
    private static final Pattern sHebrewPatternAll = Pattern.compile(
            //This pattern suits Biblical text fully
            "([[\\u0591-\\u05F4][\\uFB1D-\\uFB4F]]"+
            "[[\\u0591-\\u05BD\\u05D4\\u05DD\\u05DC][\\u05B8\\u05B3\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7],?!;:'0-9%])");
   

    public String applyFixForHebrew(String text) {
        Matcher m = sHebrewPatternAll.matcher(text);
        StringBuffer sb = new StringBuffer();
        
        while (m.find()) {        	
            String hebrewText = m.group();
            String reversed = new StringBuffer (hebrewText).reverse().toString();
            String translated = reversed;//translate(reversed);
          	m.appendReplacement(sb, "" + translated + "" );
          	//Log.d(TAG,hebrewText+","+reversed);
        }
        m.appendTail(sb);
        
        return sb.toString();      
        
    }
    
    /**
     * Translates sections of Hebrew (RTL) unicode into western locations to bypass the flakey BiDi algorithm
     * @param text Hebrew text 
     * @return text in the Western (LTR) alphabet and punctuation range, starting in the extended range
     */
    String translate(String text ){
    	StringBuffer sb = new StringBuffer(text.length());
    	int codePoint = 0;
    	
    	for(int i = 0; i < text.length(); i++){
    		codePoint = text.codePointAt(i);
    		
    		if ( codePoint >= 1424 && codePoint < 1536){ // Hebrew letters and punctuation
    			sb.append( String.valueOf((char)(codePoint - 400)));
    			
    		}else if (codePoint >= 64281 && codePoint < 64336){// Hebrew compound forms and ligatures
    			sb.append( String.valueOf((char)(codePoint -63138)));
    		
    		}else if (codePoint >= 59393 && codePoint < 59398){// Some characters from the Private Use Area
    			sb.append( String.valueOf((char)(codePoint -58257)));

    		}else if (codePoint >= 59408 && codePoint < 59410){// Some more characters from the Private Use Area
    			sb.append( String.valueOf((char)(codePoint -58267)));
       		
    		}else if (codePoint >= 1114131 && codePoint < 1114132){// One last "straggler"
    			sb.append( String.valueOf((char)(codePoint -1112933)));
    		
    		}else { //anything not in range, leave as it is
    			sb.append((text.charAt(i)));
    		}
    	}
    	return sb.toString();
    }
   
}

To use this class in your code, do the following:

String hebText = "some text in hebrew";
HebrewFixFilter fixHeb = new HebrewFixFilter();
String output = fixHeb.applyFixForHebrew(hebText);

Useful link:
Mapping table for Hebrew letters and symbols

Update:
reworked pattern.

private static final Pattern sHebrewPatternAll = Pattern.compile(
    		"([[[\\u0591-\\u05F4][\\uFB1D-\\uFB4F][\\uFB1D-\\uFB4F]][\\u0591-\\u05C7]]"+
    		"[[[\\u0591-\\u05F4][\\uFB1D-\\uFB4F][\\uFB1D-\\uFB4F]][\\u0591-\\u05C7]]"+
    		"[[[\\u0591-\\u05F4][\\uFB1D-\\uFB4F][\\uFB1D-\\uFB4F]][\\u0591-\\u05C7]])"
    		);

Leave a Reply

%d bloggers like this: