Spaces:

ovi054
/

bangla-word-ocr

Running

App Files Files Community

bangla-word-ocr / app.py

ovi054

Update app.py

acd28ec verified 3 months ago

raw

history blame contribute delete

23 kB

	# from __future__ import division, print_function
	# coding=utf-8
	# import sys
	import os
	# import glob
	# import re
	import numpy as np
	# import datetime
	# Keras
	# from tensorflow.keras.models import load_model
	#from tensorflow.keras.preprocessing import image

	# Flask utils
	# from flask import Flask, redirect, url_for, request, render_template
	# from werkzeug.utils import secure_filename
	# from gevent.pywsgi import WSGIServer





	#import everytnimg
	# from skimage.io import imread, imshow
	# from skimage.filters import gaussian, threshold_otsu
	# from skimage.feature import canny
	# from skimage.transform import probabilistic_hough_line, rotate
	# from process_image import process_image

	# import glob
	# import math

	import cv2
	# import numpy as np
	# from PIL import Image
	# from matplotlib import pyplot as plt
	# from matplotlib.patches import Rectangle
	#%matplotlib inline



	# from collections import OrderedDict
	# from PIL import Image

	# import pandas as pd
	# import seaborn as sns

	# import math


	#import all from Hough transfrom cell
	# from skimage.transform import hough_line, hough_line_peaks
	# from skimage.transform import rotate
	# from skimage.feature import canny
	# from skimage.io import imread
	# from skimage.color import rgb2gray
	# import matplotlib.pyplot as plt
	# from scipy.stats import mode as md
	# from myhough import deskew, deskew2

	# from segment_words import sortit,words,createk,hpf,bps,wps,baw

	# from myverify import verify
	#from detect_frame import detect_frame
	# import pathlib
	from PIL import ImageFont, ImageDraw, Image
	font = ImageFont.truetype("kalpurush.ttf", 60) #https://img.shields.io/badge/IEEE-10499463-0072bc.svg

	citation_text = """<div>
	<br><p>This is a demo space for the paper: <i>Improving Character Recognition in Bangla Handwritten Words: A Two-Stage Single Shot Detector Approach</i>.</p>
	<a href='https://ieeexplore.ieee.org/document/10499463' style='text-decoration: none;'>
	<img src='https://img.shields.io/static/v1?label=%E2%80%8E&logo=ieee&logoSize=auto&message=10499463&color=0072bc&labelColor=323634' alt='IEEE Badge' style='max-width: 100%; height: auto; margin-bottom: 10px;'>
	</a>
	<p>Direct Link to the Paper:
	<a href='https://ieeexplore.ieee.org/document/10499463' style='text-decoration: none; color: blue;'>https://ieeexplore.ieee.org/document/10499463</a></p>
	<p>ResearchGate Link to the Paper:
	<a href='https://www.researchgate.net/publication/380009779_Improving_Character_Recognition_in_Bangla_Handwritten_Words_A_Two-Stage_Single_Shot_Detector_Approach' style='text-decoration: none; color: blue;'>https://bit.ly/ResearchGatePaperLink</a></p>
	<p>Please cite this paper with BibTeX as follows:</p>
	</div>
	<div>
	<pre><code style="overflow-x: auto;">@inproceedings{pal2024improving,
	title={Improving Character Recognition in Bangla Handwritten Words: A Two-Stage Single Shot Detector Approach},
	author={Pal, Avi and Hasan, Md Sajid and Ahsan, Sk Md Masudul},
	booktitle={2024 International Conference on Advances in Computing, Communication, Electrical, and Smart Systems (iCACCESS)},
	pages={1--6},
	year={2024},
	organization={IEEE}
	}
	</code></pre>
	</div>"""

	#import more
	import tensorflow as tf
	from object_detection.utils import config_util
	# from object_detection.protos import pipeline_pb2
	# from google.protobuf import text_format

	# import os
	from object_detection.utils import label_map_util
	# from object_detection.utils import visualization_utils as viz_utils
	from object_detection.builders import model_builder

	# Load pipeline config and build a detection model
	WORKSPACE_PATH = 'Tensorflow/workspace'
	# SCRIPTS_PATH = 'Tensorflow/scripts'
	#APIMODEL_PATH = 'Tensorflow/models'
	ANNOTATION_PATH = WORKSPACE_PATH+'/annotations'
	# IMAGE_PATH = WORKSPACE_PATH+'/images'
	MODEL_PATH = WORKSPACE_PATH+'/models'
	PRETRAINED_MODEL_PATH = WORKSPACE_PATH+'/pre-trained-models'
	CONFIG_PATH = MODEL_PATH+'/my_ssd_mobnet/pipeline.config'
	CHECKPOINT_PATH = MODEL_PATH+'/my_ssd_mobnet/'
	# INPUT_IMAGE_PATH = 'Tensorflow/myimages'
	# MODEL_PATH = 'E:/RealTimeObjectDetection/model.best.hdf5'

	configs = config_util.get_configs_from_pipeline_file(CONFIG_PATH)
	detection_model = model_builder.build(model_config=configs['model'], is_training=False)

	# Restore checkpoint
	ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
	ckpt.restore(os.path.join(CHECKPOINT_PATH, 'ckpt-51')).expect_partial()

	@tf.function
	def detect_fn(image):
	image, shapes = detection_model.preprocess(image)
	prediction_dict = detection_model.predict(image, shapes)
	detections = detection_model.postprocess(prediction_dict, shapes)
	return detections

	def detect_frame(frame,isRealTime = False):
	image_np = np.array(frame)
	cpimg = frame.copy()
	input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
	detections = detect_fn(input_tensor)
	print(len(detections))
	num_detections = int(detections.pop('num_detections'))
	#print("hello")
	#print(num_detections)

	#print(len(detections['detection_scores']))
	detections = {key: value[0, :num_detections].numpy()
	for key, value in detections.items()}
	detections['num_detections'] = num_detections
	row,col,dummy = image_np.shape
	# detection_classes should be ints.
	detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
	#print(detections['detection_classes'])
	mark = [0]*15
	myletters = []
	for i in range(0,15):
	curi=detections['detection_classes'][i]
	classi=classes[curi]
	print(classes[curi],end='-')
	cur=detections['detection_scores'][i]
	if(cur<0.2):
	continue
	print(cur,end=' ')
	print(detections['detection_boxes'][i], end=' ')
	x0=(detections['detection_boxes'][i][0])
	y0=(detections['detection_boxes'][i][1])
	x1=(detections['detection_boxes'][i][2])
	y1=(detections['detection_boxes'][i][3])
	curarea=(x1-x0)*(y1-y0)
	ok=1
	for j in range(0,i):
	#print(mark[j])
	if mark[j]==0:
	continue
	curj=detections['detection_classes'][j]
	classj=classes[curj]

	if classi=='ি' or classj=='ি':
	if classi!=classj:
	continue
	if classi=='ী' or classj=='ী':
	if classi!=classj:
	continue

	x2=(detections['detection_boxes'][j][0])
	y2=(detections['detection_boxes'][j][1])
	x3=(detections['detection_boxes'][j][2])
	y3=(detections['detection_boxes'][j][3])
	x4=max(x0,x2)
	y4=max(y0,y2)
	x5=min(x1,x3)
	y5=min(y1,y3)
	if x4>x5 or y4>y5:
	continue
	prevarea=(x3-x2)*(y3-y2)
	commonarea=(x5-x4)*(y5-y4)
	ins1=curarea/commonarea
	ins2=prevarea/commonarea
	ins=commonarea/(curarea+prevarea-commonarea)
	print(ins1,end=' ')
	if(ins>=0.5):
	ok=0
	cur=detections['detection_classes'][j]
	print(classes[cur])
	break
	if ok==1:
	mark[i]=1
	cur=detections['detection_classes'][i]
	#myletters.append(classes[cur])
	print(ok)
	#verification
	for i in range(0,15):
	if mark[i]==0 or avver==0:
	continue
	if detections['detection_classes'][i]>38:
	continue
	x0=int(detections['detection_boxes'][i][0]*row)
	y0=int(detections['detection_boxes'][i][1]*col)
	x1=int(detections['detection_boxes'][i][2]*row)
	y1=int(detections['detection_boxes'][i][3]*col)
	#print(y0,y1,x0,x1)
	currImg = cpimg[x0:x1,y0:y1]

	curscore = detections['detection_scores'][i]
	curclass = detections['detection_classes'][i]
	label,conf = verify(currImg)
	#print(ulta[label],conf)
	#print(curclass,curscore)
	if conf>curscore and ulta[label]!=curclass and ulta[label]!=-1:
	detections['detection_classes'][i]=ulta[label]
	detections['detection_scores'][i]=conf

	for i in range(0,15):
	if(detections['detection_scores'][i]<0.2):
	continue
	if mark[i]==0:
	continue
	cur=detections['detection_classes'][i]
	cur=classes[cur]
	y0=(detections['detection_boxes'][i][1])
	y1=(detections['detection_boxes'][i][3])
	pair = (y0,cur,y1)
	myletters.append(pair)
	myletters.sort(key = lambda x: x[0])
	#print(myletters)
	for i in range(len(myletters)-1,-1,-1):
	y0=myletters[i][0]
	curr=myletters[i][1]
	y1=myletters[i][2]
	if curr=='ু' or curr=='্র':
	mxarea=0
	mxno=i-1
	for j in range(0,len(myletters)):
	if i==j:
	continue
	y2=myletters[j][0]
	y3=myletters[j][2]
	curcommon = min(y3,y1)-max(y0,y2)
	if curcommon>mxarea:
	mxarea = curcommon
	mxno=j
	if mxno!=(i-1):
	myletters[i],myletters[i+1]=myletters[i+1],myletters[i]

	res_list = [x[1] for x in myletters]
	print(res_list)


	for i in range(len(res_list)-2, -1, -1):
	x=res_list[i]
	y=res_list[i+1]
	if x=='ে' or x=='ি':
	res_list[i],res_list[i+1]=res_list[i+1],res_list[i]
	for i in range(len(res_list)-2, -1, -1):
	x=res_list[i]
	y=res_list[i+1]
	print(x,y)
	if x=='অ' and y=='া':
	print('yo')
	res_list[i]='আ'
	res_list.pop(i+1)
	print(res_list)
	for i in res_list:
	print(i,end='')

	print(' ')
	return res_list




	# Define a flask app
	# app = Flask(__name__)

	# Model saved with Keras model.save()

	# Load your trained model
	# model = load_model(MODEL_PATH)
	#model._make_predict_function() # Necessary
	# print('Model loaded. Start serving...')

	# You can also use pretrained model from Keras
	# Check https://keras.io/applications/
	#from keras.applications.resnet50 import ResNet50
	#model = ResNet50(weights='imagenet')
	#model.save('')
	# print('Model loaded. Check http://127.0.0.1:5000/')
	avver=0
	clicked=1
	wp = None; bp = None;

	category_index = label_map_util.create_category_index_from_labelmap(ANNOTATION_PATH+'/label_map.pbtxt')
	classes=['অ','ই','উ','এ','ও','ক','খ','গ','ঘ','চ','ছ','জ','ঝ','ট','ঠ','ড','ত','থ','দ','ধ','ন','প','ফ','ব','ভ','ম','য','র','ল','শ','ষ','স','হ','ড়','য়','ৎ','ং','ঁ','০','১','২','৩','৪','৫','৭','৮','া','ি','ী','ে','ু','্র','্য']
	labels=[1,2,4,7,9,11,12,13,14,16,17,18,19,21,22,23,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,45,46,47,49,50,51,52,53,54,55,57,58,60,61,62,63,64,66,67]
	ulta=[0,-1,1,-1,2,-1,-1,3,-1,4,-1,5,6,7,8,-1,9,10,11,12,-1,13,14,15,-1,-1,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,-1,34,35,36,-1,37,38,39,40,41,42,43,-1,44,45,-1,46,47,48,49,50,-1,51,52]


	def model_predict(word):
	#img = cv2.imread(img_path,cv2.IMREAD_GRAYSCALE)
	'''
	if clicked==1:
	bp = 66
	wp = 160
	mode = "GCMODE"
	if mode == "GCMODE":
	img= hpf(img,kSize = 51)
	wp = 127
	img = wps(img,wp)
	img = bps(img)
	elif mode == "RMODE":
	bps()
	wps()
	elif mode == "SMODE":
	bps()
	wps()
	baw()
	img = cv2.fastNlMeansDenoising(img, img, 50.0, 7, 21)
	print("\ndone.")
	xs=img.shape
	if len(xs)==3:
	img = img[:,:,0]

	img = cv.adaptiveThreshold(img,255,cv.ADAPTIVE_THRESH_GAUSSIAN_C,cv.THRESH_BINARY,11,2)
	angeel = deskew(img)
	if angeel!=0:
	img = deskew2(img,angeel)
	ho,wo=img.shape
	area=ho*wo
	ara=words(img,25,11,7,area/5000)
	ara.reverse()
	#cv2.imshow('input image',img)
	sz=len(ara)
	for i in range(0,sz):
	ara[i]=sorted(ara[i], key=lambda entry:entry[0][0])
	cnt2=0
	files = glob.glob('Tensorflow/myimages/*')
	for f in files:
	os.remove(f)
	for i in range(0,sz):
	#print(ara[i].shape)
	tmp=ara[i]
	sz2=len(tmp)
	if i%10==0:
	cnt2=cnt2+1
	for j in range(0,sz2):
	a,b=tmp[j]
	b = cv2.adaptiveThreshold(b,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
	if j<10:
	cnt3=0
	elif j<20:
	cnt3=1
	else:
	cnt3=2
	cv2.imwrite('Tensorflow/myimages/ocr %d%d%d%d.jpg' % (cnt2,i,cnt3,j), b)
	#cv2.imshow('Crop %d%d' % (i,j), b)
	cv2.waitKey(0)

	PATH_TO_TEST_IMAGES_DIR = pathlib.Path('Tensorflow/myimages')
	TEST_IMAGE_PATHS = (list(PATH_TO_TEST_IMAGES_DIR.glob(".jpg"))+list(PATH_TO_TEST_IMAGES_DIR.glob(".jpeg"))) #+list(PATH_TO_TEST_IMAGES_DIR.glob("*.png"))
	print(len(TEST_IMAGE_PATHS))
	final = []
	for image_path in TEST_IMAGE_PATHS:
	print("ovi")
	print(image_path)
	frame = cv2.imread(str(image_path))
	x=str(image_path)
	print(x[25])
	# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	final.append((detect_frame(frame),x[25]))
	'''
	#New Added
	if "composite" in word and word["composite"] is not None:
	word = word["composite"]
	elif "background" in word and word["background"] is not None:
	word = word["background"]
	else:
	raise ValueError("No valid image found in EditorValue dict (both 'composite' and 'background' are None)")

	frame = cv2.fastNlMeansDenoising(word,word, 50.0, 7, 21)
	xs = frame.shape
	if(len(xs)==3):
	frame = frame[:,:,0]
	frame= cv2.adaptiveThreshold(frame,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
	frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)
	# x=str(img_path)
	#print(x[25])
	# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	image_np = np.array(frame)
	cpimg = frame.copy()
	input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
	image_t, shapes = detection_model.preprocess(input_tensor)
	prediction_dict = detection_model.predict(image_t, shapes)
	detections = detection_model.postprocess(prediction_dict, shapes)
	# print(len(detections))
	num_detections = int(detections.pop('num_detections'))
	#print("hello")
	#print(num_detections)

	#print(len(detections['detection_scores']))
	detections = {key: value[0, :num_detections].numpy()
	for key, value in detections.items()}
	detections['num_detections'] = num_detections
	row,col,dummy = image_np.shape
	# detection_classes should be ints.
	detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
	#print(detections['detection_classes'])
	mark = [0]*15
	myletters = []
	for i in range(0,15):
	curi=detections['detection_classes'][i]
	classi=classes[curi]
	# print(classes[curi],end='-')
	cur=detections['detection_scores'][i]
	if(cur<0.2):
	continue
	# print(cur,end=' ')
	# print(detections['detection_boxes'][i], end=' ')
	x0=(detections['detection_boxes'][i][0])
	y0=(detections['detection_boxes'][i][1])
	x1=(detections['detection_boxes'][i][2])
	y1=(detections['detection_boxes'][i][3])
	curarea=(x1-x0)*(y1-y0)
	ok=1
	for j in range(0,i):
	#print(mark[j])
	if mark[j]==0:
	continue
	curj=detections['detection_classes'][j]
	classj=classes[curj]

	if classi=='ি' or classj=='ি':
	if classi!=classj:
	continue
	if classi=='ী' or classj=='ী':
	if classi!=classj:
	continue

	x2=(detections['detection_boxes'][j][0])
	y2=(detections['detection_boxes'][j][1])
	x3=(detections['detection_boxes'][j][2])
	y3=(detections['detection_boxes'][j][3])
	x4=max(x0,x2)
	y4=max(y0,y2)
	x5=min(x1,x3)
	y5=min(y1,y3)
	if x4>x5 or y4>y5:
	continue
	prevarea=(x3-x2)*(y3-y2)
	commonarea=(x5-x4)*(y5-y4)
	ins1=curarea/commonarea
	ins2=prevarea/commonarea
	ins=commonarea/(curarea+prevarea-commonarea)
	# print(ins1,end=' ')
	if(ins>=0.5):
	ok=0
	cur=detections['detection_classes'][j]
	# print(classes[cur])
	break
	if ok==1:
	mark[i]=1
	cur=detections['detection_classes'][i]
	#myletters.append(classes[cur])
	# print(ok)
	#verification
	for i in range(0,15):
	if mark[i]==0 or avver==0:
	continue
	if detections['detection_classes'][i]>38:
	continue
	x0=int(detections['detection_boxes'][i][0]*row)
	y0=int(detections['detection_boxes'][i][1]*col)
	x1=int(detections['detection_boxes'][i][2]*row)
	y1=int(detections['detection_boxes'][i][3]*col)
	#print(y0,y1,x0,x1)
	currImg = cpimg[x0:x1,y0:y1]

	curscore = detections['detection_scores'][i]
	curclass = detections['detection_classes'][i]
	label,conf = verify(currImg)
	#print(ulta[label],conf)
	#print(curclass,curscore)
	if conf>curscore and ulta[label]!=curclass and ulta[label]!=-1:
	detections['detection_classes'][i]=ulta[label]
	detections['detection_scores'][i]=conf

	for i in range(0,15):
	if(detections['detection_scores'][i]<0.2):
	continue
	if mark[i]==0:
	continue
	cur=detections['detection_classes'][i]
	cur=classes[cur]
	y0=(detections['detection_boxes'][i][1])
	y1=(detections['detection_boxes'][i][3])
	pair = (y0,cur,y1)
	myletters.append(pair)
	myletters.sort(key = lambda x: x[0])
	#print(myletters)
	for i in range(len(myletters)-1,-1,-1):
	y0=myletters[i][0]
	curr=myletters[i][1]
	y1=myletters[i][2]
	if curr=='ু' or curr=='্র':
	mxarea=0
	mxno=i-1
	for j in range(0,len(myletters)):
	if i==j:
	continue
	y2=myletters[j][0]
	y3=myletters[j][2]
	curcommon = min(y3,y1)-max(y0,y2)
	if curcommon>mxarea:
	mxarea = curcommon
	mxno=j
	if mxno!=(i-1):
	myletters[i],myletters[i+1]=myletters[i+1],myletters[i]

	res_list = [x[1] for x in myletters]
	# print(res_list)


	for i in range(len(res_list)-2, -1, -1):
	x=res_list[i]
	y=res_list[i+1]
	if x=='ে' or x=='ি':
	res_list[i],res_list[i+1]=res_list[i+1],res_list[i]
	for i in range(len(res_list)-2, -1, -1):
	x=res_list[i]
	y=res_list[i+1]
	# print(x,y)
	if x=='অ' and y=='া':
	# print('yo')
	res_list[i]='আ'
	res_list.pop(i+1)
	# print(res_list)
	output=''
	for i in res_list:
	output=output+i

	# print(' ')
	# time_now = datetime.datetime.now().strftime('%m_%d_%Y_%I_%M_%S_%p')
	# # print(time_now)
	# date = datetime.date.today().strftime('%Y_%m_%d')
	# # print(date)
	# folderName = "created/"+date
	# if(not os.path.isdir(folderName)):
	# os.makedirs(folderName)
	# fileName = folderName+ "/" + time_now + ".png"
	# cv2.imwrite(fileName,word)
	pil_image = Image.fromarray(word)
	pil_image = pil_image.convert("RGBA") #New Added
	for i in range(0,15):
	if mark[i]==0:
	continue
	x0=(detections['detection_boxes'][i][0])*row
	y0=(detections['detection_boxes'][i][1])*col
	x1=(detections['detection_boxes'][i][2])*row
	y1=(detections['detection_boxes'][i][3])*col
	pt1 = (y0,x0)
	pt2 = (y1,x1)
	# color = (0, 0, 255) # Red color in BGR format
	# thickness = 2 # Border thickness in pixels
	# word = cv2.rectangle(word, pt1, pt2, color, thickness)
	# draw = ImageDraw.Draw(pil_image,"RGBA")
	overlay = Image.new("RGBA", pil_image.size, (0,0,0,0)) # New Added
	draw = ImageDraw.Draw(overlay) #New Added

	curi=detections['detection_classes'][i]
	classi=classes[curi]
	shape = [(y0,x0), (y1, x1)]
	draw.rectangle(shape,fill=(0, 100, 200, 127))
	draw.rectangle(shape, outline=(0, 0, 0, 127), width=3)

	bbox = draw.textbbox(pt1, classi, font=font)
	draw.rectangle(bbox, fill=(200, 100, 0, 200))
	draw.text(pt1, classi, font=font, fill=(0,0,0,255))

	# merge overlay into the image
	pil_image = Image.alpha_composite(pil_image, overlay) #New Added

	newWordImg = np.asarray(pil_image)
	return output, newWordImg
	'''
	output=''
	for i in range(0,len(final)):
	ara=final[i][0]
	numb=final[i][1]
	if i>0 and numb!=final[i-1][1]:
	output= output+'\n'
	word = ''.join(ara)
	#corrected_word = get_campaign(word)
	output= output + word
	#print(corrected_word,end='')
	output = output + ' '
	return output
	'''

	import gradio as gr
	# HF_TOKEN = os.getenv("SECRET_TOKEN")
	# hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "word-flag-data")

	demo = gr.Interface(fn=model_predict,
	# inputs= "paint", #New Added
	inputs=gr.Paint(
	type="numpy",
	brush=gr.Brush(default_size=12, colors=["#000000"], color_mode="fixed"),
	canvas_size = (1200,800),
	layers = False
	),
	outputs=["text","image"],
	deep_link=False,
	# title= "Bangla Word OCR",
	# description="Reduce pen ink size from Pen Icon(🖋️) for better results!",
	# examples=[
	# ["Tensorflow/workspace/images/tmpbvc06xxf.png"],
	# ["Tensorflow/workspace/images/tmpfhin6fzg.png"],
	# ["Tensorflow/workspace/images/tmprhqli3yl.png"],
	# ],
	article=citation_text,
	# allow_flagging="auto",
	# flagging_callback=hf_writer
	)
	demo.launch()