Recognize text in images with ML Kit on Android

This post is about, how to recognize text in images using Google ML Kit Text Recognition API in Android application with the help of a simple demo app.
In the Demo App, we will recognize and extract text from images placed in the app assets and draw the bounding box around each element (word).
 

ML Kit’s Text Recognition API

ML Kit’s Text Recognition API recognize and extract text from images. The Text Recognizer segments the text into blocks ( paragraph or column ), lines, and elements( word ).

Creating new project

Create a new project by going to File ⇒ New Android Project, select Empty Activity, provide app name, select language to java and then finally click on finish.

Adding Dependency

Open build.gradle (Module: app) file and add the ML Kit’s Text Recognition API dependency inside the dependencies section:

build.gradle

dependencies {
implementation 'com.google.android.gms:play-services-mlkit-text-recognition:16.0.0'
}
Creating Layout File

The activity_main.xml layout file defines the UI of the application.

activity_main.xml

<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:app="http://schemas.android.com/apk/res-auto"
xmlns:tools="http://schemas.android.com/tools"
android:layout_width="match_parent"
android:layout_height="match_parent"
tools:context=".MainActivity">

<ImageView
android:id="@+id/image_view"
android:layout_width="match_parent"
android:layout_height="0dp"
android:scaleType="fitStart"
app:layout_constraintBottom_toTopOf="@+id/btn_find_text"
app:layout_constraintLeft_toLeftOf="parent"
app:layout_constraintRight_toRightOf="parent"
app:layout_constraintTop_toTopOf="parent" />

<com.c1ctech.textrecognitionexp.GraphicOverlay
android:id="@+id/graphic_overlay"
android:layout_width="match_parent"
android:layout_height="0dp"
android:layout_alignParentStart="true"
app:layout_constraintBottom_toBottomOf="@id/image_view"
app:layout_constraintLeft_toLeftOf="@id/image_view"
app:layout_constraintRight_toRightOf="@id/image_view"
app:layout_constraintTop_toTopOf="@id/image_view" />

<Button
android:id="@+id/btn_find_text"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="Find Text"
android:layout_margin="10dp"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintLeft_toLeftOf="parent"
app:layout_constraintRight_toRightOf="parent" />

</androidx.constraintlayout.widget.ConstraintLayout>
Recognize and extract text from image bitmap

To recognize and extract text from the image bitmap, we have to follow the below steps: 

  • prepare input image using a bitmap.
  • creating TextRecognizer instance.
  • process the image.
    • If the text recognition operation succeeds, a Text object is passed to the success listener.
    • In case of error, an exception is passed to the failure listener.
//recognize and extract text from image bitmap
private void runTextRecognition() {

//prepare input image using bitmap
InputImage image = InputImage.fromBitmap(mSelectedImage, 0);

//creating TextRecognizer instance
TextRecognizer recognizer = TextRecognition.getClient();

//process the image
recognizer.process(image)
.addOnSuccessListener(
new OnSuccessListener<Text>() {
@Override
public void onSuccess(Text texts) {
//Task completed successfully
processTextRecognitionResult(texts);
}
})
.addOnFailureListener(
new OnFailureListener() {
@Override
public void onFailure(@NonNull Exception e) {
// Task failed with an exception
e.printStackTrace();
}
});
}
Extract text from blocks of recognized text

A Text object contains the full text recognized in the image and zero or more TextBlock objects.

TextBlock –  represents a rectangular block of text, which contains zero or more Line objects.

Line –  object contains zero or more Element objects.

Element –  objects which represent words and word-like entities such as dates and numbers.

private void processTextRecognitionResult(Text texts) {
List<Text.TextBlock> blocks = texts.getTextBlocks();
if (blocks.size() == 0) {
Toast.makeText(getApplicationContext(), "No text found", Toast.LENGTH_SHORT).show();
return;
}
mGraphicOverlay.clear();
for (Text.TextBlock block : texts.getTextBlocks()) {
for (Text.Line line : block.getLines()) {
for (Text.Element element : line.getElements()) {
//draws the bounding box around the element.
GraphicOverlay.Graphic textGraphic = new TextGraphic(mGraphicOverlay, element);
mGraphicOverlay.add(textGraphic);
}
}
}
}
Complete MainActivity Code

MainActivity.kt

package com.c1ctech.textrecognitionexp;

import androidx.annotation.NonNull;
import androidx.appcompat.app.AppCompatActivity;

import android.content.Context;
import android.content.res.AssetManager;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.os.Bundle;
import android.view.View;
import android.widget.Button;
import android.widget.ImageView;
import android.widget.Toast;

import com.google.android.gms.tasks.OnFailureListener;
import com.google.android.gms.tasks.OnSuccessListener;
import com.google.mlkit.vision.common.InputImage;
import com.google.mlkit.vision.text.Text;
import com.google.mlkit.vision.text.TextRecognition;
import com.google.mlkit.vision.text.TextRecognizer;

import java.io.IOException;
import java.io.InputStream;
import java.util.List;

public class MainActivity extends AppCompatActivity {

private ImageView mImageView;
private Button mFindTextBtn;
private Bitmap mSelectedImage;
private GraphicOverlay mGraphicOverlay;

@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);

mImageView = findViewById(R.id.image_view);

mFindTextBtn = findViewById(R.id.btn_find_text);

mGraphicOverlay = findViewById(R.id.graphic_overlay);

mSelectedImage = getBitmapFromAsset(this, "page.png");
mImageView.setImageBitmap(mSelectedImage);

mFindTextBtn.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v) {
//if bitmap is not null
if (mSelectedImage != null) {
//Creates a new bitmap, scaled from an existing bitmap
Bitmap resizedBitmap = createScaleFactorUsingBitmap(mSelectedImage);
//setting new scaled bitmap in imageview
mImageView.setImageBitmap(resizedBitmap);
mSelectedImage = resizedBitmap;
}
runTextRecognition();
}
});

}

private Bitmap createScaleFactorUsingBitmap(Bitmap mSelectedImage) {
// Determine how much to scale down the image
float scaleFactor =
Math.max(
(float) mSelectedImage.getWidth() / (float) mImageView.getWidth(),
(float) mSelectedImage.getHeight() / (float) mImageView.getHeight());

Bitmap resizedBitmap =
Bitmap.createScaledBitmap(
mSelectedImage,
(int) (mSelectedImage.getWidth() / scaleFactor),
(int) (mSelectedImage.getHeight() / scaleFactor),
true);

return resizedBitmap;
}

//recognize and extract text from image bitmap
private void runTextRecognition() {

//prepare input image using bitmap
InputImage image = InputImage.fromBitmap(mSelectedImage, 0);

//creating TextRecognizer instance
TextRecognizer recognizer = TextRecognition.getClient();

//process the image
recognizer.process(image)
.addOnSuccessListener(
new OnSuccessListener<Text>() {
@Override
public void onSuccess(Text texts) {
//Task completed successfully.
processTextRecognitionResult(texts);
}
})
.addOnFailureListener(
new OnFailureListener() {
@Override
public void onFailure(@NonNull Exception e) {
// Task failed with an exception
e.printStackTrace();
}
});
}


//perform operation on the full text recognized in the image.
private void processTextRecognitionResult(Text texts) {

List<Text.TextBlock> blocks = texts.getTextBlocks();
if (blocks.size() == 0) {
Toast.makeText(getApplicationContext(), "No text found", Toast.LENGTH_SHORT).show();
return;
}
mGraphicOverlay.clear();
for (Text.TextBlock block : texts.getTextBlocks()) {
for (Text.Line line : block.getLines()) {
for (Text.Element element : line.getElements()) {
//Draws the bounding box around the element.
GraphicOverlay.Graphic textGraphic = new TextGraphic(mGraphicOverlay, element);
mGraphicOverlay.add(textGraphic);
}
}
}
}

//get bitmap of image from app assets.
public Bitmap getBitmapFromAsset(Context context, String filePath) {
AssetManager assetManager = context.getAssets();

InputStream is;
Bitmap bitmap = null;
try {
is = assetManager.open(filePath);
bitmap = BitmapFactory.decodeStream(is);
} catch (IOException e) {
e.printStackTrace();
}
return bitmap;
}
}

When you run the app it will look like this as  shown below:

       

Leave a Reply