From bc7f2820e95138f80039952458e1fd53eaf63f9f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 16 Jul 2025 08:49:27 +0000 Subject: [PATCH 1/3] Initial plan From 0a9f9b2fa4fe72c24b82d25abba1da988109041b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 16 Jul 2025 08:55:15 +0000 Subject: [PATCH 2/3] Complete implementation of text_ocr.py with image generation and OCR extraction Co-authored-by: ashishkurmi <100655670+ashishkurmi@users.noreply.github.com> --- __pycache__/text_ocr.cpython-312.pyc | Bin 0 -> 5584 bytes requirements.txt | 2 + text_ocr.py | 155 +++++++++++++++++++++++++++ 3 files changed, 157 insertions(+) create mode 100644 __pycache__/text_ocr.cpython-312.pyc create mode 100644 requirements.txt create mode 100755 text_ocr.py diff --git a/__pycache__/text_ocr.cpython-312.pyc b/__pycache__/text_ocr.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3984aa34f7b0808e88ca4c3ffbd3095e0d958ef1 GIT binary patch literal 5584 zcmcIoTWk|o8b0G0+vBkv$2cKzNMN`%F_5?y+NOrE3klaQ&;q^S)vkBq87B_TjCW>C z63a%YyDK>o63M=hs#__2O9k4cQl)OCN)-jEEA_=`5|N!oTBPMAZz=Fl(S6wepYb?u z8dP?r9%<&>{_~&f|9$^CU-|tUf$-+w$lzNW2>CZ=?BJ**9{vsz*N8~sM5IJVhKf^W z?1(!crZe;~6K95&hMM6ocUD+{yKX!G1-=9inBv}}i z4shJzQDHcnd2*9&$D~X~;Zv$I%pXnb$8!CAuaZq?6n*P9(5>lRS$Tk4#^pK{p$(_G zG=|5F`cz@0l02fwdeYuA6n4Q!!}m~z(rd&6J5zLsjv_76MMh+bP8}B93PZK+DrKUx z;3&{3M}fJHS|Ai%y3kc>#C(bxn6HVEf@;K9-$b> z!(579vA*bod3sTU=*`1uiv9v!@YUq&U{t7XO(^&ab+9_5;DoUSuQOIPMGCH=O>0CU zTcGTo;k<9yPIR9wqu1G~lw$``cTuoYHS1Py*RIC4_ z5i&}jCnMB(a^Jp>MWJhIl)*8sBKJcZ?$Ev9G|Tz*wUd zk5G0$k&S{Mtr?BvXvIX=Sm+w#1Tzx&k-@Ys@%=*b!hoveWU8&;9f&2>#%AJ-q~}z* zW)%I3l7ar!9Q2jqwOleOX<90m*~1?ZGMdCIFbZ`ftyy}~Jebk)KVT^hr#Vr>*`I-_ z8uXc?hn9Chc|ID=X=+ph+F& zKjtq&84NaBu9hsL-7cd^E~E7=H^49A0I75)qvH8Hmv26-sxbG;yb%P=iO2X4Q5#^` zkw{QQH3lt9BPt$pgF(|9-rYpeb!WExR0N<626QghzU_j3Av1Jcn*&yC4g2% zhDHF9{r$?QK?6n_3}y{R6D~@IGpnX$-JrEG&2VYDD5+|sUd01xu=Zxb>ZL_}&~Ob( z>48Drs9!xViKLQIRD(l}m9*2GkHLs0P~!R|v~*rFr*AOkC@gVHsn={;(M*jR)R;j@ z8rmS9q1y2Hcyv%1mZEA-1`3LfDC&i1QW0SZJK^NWdZ#8Ob81>2dqSz%kwqFtvwiLo z=4ca0dkGX!pNF0i5w(8U#>-@7W2dw6UVW(KTnsjpT#K8wmD%RS#?}(|$VGx1uX!dt zQ#+@3&TO3CQhxr>TzzlJxfE!=dTIR9lr$IEQDRnHtfzIcxutY`Db#v5v~xDJv;53k z<@5b>p=7D=+YQ85e=pQ@t!J`ldgDyP&Ay4AxzH=6zQu6cl<;oT^xhfjgO_K+yGkb( zTQ*Pq{@wQJzM0?$M`v4hkG~2dXla`|_g;AV%)6UWptbbsJ^zM@_^f}+^ubx*&Jwlg z4^3Q{^>3XPW_|4?YQ;r7{;QsG&%|>R!o)LEJEr3|#gb>9-G?=GSGjR+qI;fgT?~Z2 zd-Ng+Hvgrm9N7ARI6bW^4DD}SYU1xUbZ8f0 z&Ao1ND|qllNZ)n``UriysiAj|jDLrVH4^{ccE~#a^qNCtgkJGCh2K8qN z@X|)RHLzGmk*=AEvp}1?^wjeJUSf*$C3+0K9EfXp$(8@g#PY+I3uqCE>3LQC$Eq** zME0?fXk98%Qmf>Hj%-#{kQRP|N`T5@wv$`&SNHG$%%>FDBp1X2%m3w50>H9p>dS{r zsEt*;WgHjZp7*x%vZC`TWKcB(%X%um&6KkpUxax?7t7_W0Di=0I26rbvI1gjT0>@qpmU! zNwnbYSt!(yR1jI!=TM}nB4P?8aBD13RQE&eYbZYnKY+r9ptIqAAb53Te8dE}!TCV@ zk}p_pXrE!H^EWq@H}u@;{qn@E6AQl6i=ie9Ah}6ys(Y?}2jF6O$KCMW+3?<*Z_I`F zmFfU;S~~8AJ7>e4bKz*I?p~m+%(i`Z-`jx5(O@FS^EaQnxxHNX(r0HskAE6pV2>>Y zLMC!FJl4>Udgs8w9&(G?hyNbWp)UFh*XBbJ`iqDY(n%1ghN|5!!kS(qtDd-y>}`f2 zT$eTBdIdW~>cY#grl}e$an!V|TFPqkT05_4#r$8boc}{BJTqok4}HYyje#OHI@g)G z=t38~_)}7DQwp$esETBj?jf=YKm{ysquBNogGOPC|WR%ObJ6QiSk2P2`p4Cxq zSg*8DGo!IUB$PjPQh?h?Dkqz77sv;L@C*Skom7U0A=Q~l1HcRFKyDa(MtsE(0j0rn zgnJO)VbHBiWt5RF75TX0uE+S3Y0Z4Gs5UflKEK;~n|P{siTpwPU1B})r+Sl6@f@jW zX!%VQB07h(k`!blpneJNV_AJHzok-n1SkdHM1CaDcGS8fR_`Cy!w+&)jBkHTwXdo$ z$q(1jcA2dUOFmdpf!9C0y2bc>>){G%+H|+d>|I@_@*Vc5swykYvz8n4fr=u~jq{7; z>pRzkh(|^IH>uKKU{@|$)QPNVFw$rm=pF0Poi~XL-f{dmy##Qkd1*E3EDp7O3w0le zY6v)0W@LupEUCRvBxBv{zy+7dBbKn$su)E&pMDfsL8j zeRsBhx$D-h1@@$wJv$%&&E2>-8y6Q?X^Hh-Wyjfx;1%!vhNf%BCXY?MKKbh9lS_@w z*WR9ddrFx6_2pB*j=u0+@8((W=6Nq~D%t(<_D^lQ$2{dk;Oi-F?PW_O(sP<+jc; z*EP>XEk#_%JQG=J+%xmrxyJpMPu+7jmfdZpmhR8A&-0(cGMxBP@7$(&rgaGva;@`B z8zf3y3rzSStSsC$6A4`FL;{>5cOoGw$wb0(gZ5%I{o3(gnZA&DA2x5oYByA@Zm+pZ zs0NRFBJm+n`>-AzoX70`VMWYkqyy>+$ReR@zl0xLSSadS#!E2|TL|U-hIsx(cF&UC t-;fOtT-{XTgNxKAs^P)@2-ULkHbod;DgQMc{wKr!>BtIAm=?2#{{qD*6&L^j literal 0 HcmV?d00001 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..34f59cb --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +Pillow>=10.0.0 +pytesseract>=0.3.0 \ No newline at end of file diff --git a/text_ocr.py b/text_ocr.py new file mode 100755 index 0000000..405da8a --- /dev/null +++ b/text_ocr.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 +""" +Text to Image with OCR Extraction + +This script generates an image with user-specified text and then extracts +the text using OCR (Optical Character Recognition). + +Usage: + python text_ocr.py + +Example: + python text_ocr.py "Hello from GitHub Copilot!" test.png +""" + +import sys +import os +from PIL import Image, ImageDraw, ImageFont +import pytesseract + + +def generate_image_with_text(text, filename): + """ + Generate an image with specified text on a white background. + + Args: + text (str): The text to embed in the image + filename (str): The filename for the generated image + + Returns: + bool: True if successful, False otherwise + """ + try: + # Image dimensions and settings + width = 800 + height = 200 + background_color = 'white' + text_color = 'black' + + # Create a new image with white background + image = Image.new('RGB', (width, height), background_color) + draw = ImageDraw.Draw(image) + + # Try to use a larger font if available, fallback to default + try: + font_size = 36 + font = ImageFont.truetype('/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf', font_size) + except (OSError, IOError): + # Fallback to default font if specific font not found + try: + font = ImageFont.load_default() + except Exception: + font = None + + # Calculate text position to center it + if font: + # Get text bounding box + bbox = draw.textbbox((0, 0), text, font=font) + text_width = bbox[2] - bbox[0] + text_height = bbox[3] - bbox[1] + else: + # Rough estimation for default font + text_width = len(text) * 10 + text_height = 20 + + x = (width - text_width) // 2 + y = (height - text_height) // 2 + + # Draw the text on the image + draw.text((x, y), text, fill=text_color, font=font) + + # Save the image + image.save(filename) + return True + + except Exception as e: + print(f"Error generating image: {e}", file=sys.stderr) + return False + + +def extract_text_from_image(filename): + """ + Extract text from an image using OCR. + + Args: + filename (str): The filename of the image to process + + Returns: + str: Extracted text, or None if extraction failed + """ + try: + # Check if file exists + if not os.path.isfile(filename): + print(f"Error: Image file '{filename}' not found", file=sys.stderr) + return None + + # Open the image and extract text using pytesseract + image = Image.open(filename) + extracted_text = pytesseract.image_to_string(image) + + # Clean up the extracted text (remove extra whitespace) + extracted_text = extracted_text.strip() + + return extracted_text + + except Exception as e: + print(f"Error extracting text from image: {e}", file=sys.stderr) + return None + + +def main(): + """Main function to handle command-line arguments and execute the workflow.""" + + # Check for correct number of arguments + if len(sys.argv) != 3: + print("Error: Missing arguments", file=sys.stderr) + print("Usage: python text_ocr.py ", file=sys.stderr) + print("Example: python text_ocr.py \"Hello from GitHub Copilot!\" test.png", file=sys.stderr) + sys.exit(1) + + text_to_embed = sys.argv[1] + image_filename = sys.argv[2] + + # Validate arguments + if not text_to_embed.strip(): + print("Error: Text cannot be empty", file=sys.stderr) + sys.exit(1) + + if not image_filename.strip(): + print("Error: Filename cannot be empty", file=sys.stderr) + sys.exit(1) + + # Generate the image with text + print(f"Generating image with text: '{text_to_embed}'") + if not generate_image_with_text(text_to_embed, image_filename): + print("Error: Failed to generate image", file=sys.stderr) + sys.exit(1) + + print(f"Generated image: {image_filename}") + + # Extract text from the generated image using OCR + print("Extracting text using OCR...") + extracted_text = extract_text_from_image(image_filename) + + if extracted_text is None: + print("Error: Failed to extract text from image", file=sys.stderr) + sys.exit(1) + + # Display the results + print("Extracted Text:") + print("----------------") + print(extracted_text) + + +if __name__ == "__main__": + main() \ No newline at end of file From 001dd60705b860900569677d62da12094266fc5f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 16 Jul 2025 08:55:52 +0000 Subject: [PATCH 3/3] Add .gitignore to exclude __pycache__ and generated images Co-authored-by: ashishkurmi <100655670+ashishkurmi@users.noreply.github.com> --- .gitignore | 74 +++++++++++++++++++++++++++ __pycache__/text_ocr.cpython-312.pyc | Bin 5584 -> 0 bytes 2 files changed, 74 insertions(+) create mode 100644 .gitignore delete mode 100644 __pycache__/text_ocr.cpython-312.pyc diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a88513f --- /dev/null +++ b/.gitignore @@ -0,0 +1,74 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Virtual environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Generated images (for testing) +*.png +*.jpg +*.jpeg +*.gif +*.bmp \ No newline at end of file diff --git a/__pycache__/text_ocr.cpython-312.pyc b/__pycache__/text_ocr.cpython-312.pyc deleted file mode 100644 index 3984aa34f7b0808e88ca4c3ffbd3095e0d958ef1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5584 zcmcIoTWk|o8b0G0+vBkv$2cKzNMN`%F_5?y+NOrE3klaQ&;q^S)vkBq87B_TjCW>C z63a%YyDK>o63M=hs#__2O9k4cQl)OCN)-jEEA_=`5|N!oTBPMAZz=Fl(S6wepYb?u z8dP?r9%<&>{_~&f|9$^CU-|tUf$-+w$lzNW2>CZ=?BJ**9{vsz*N8~sM5IJVhKf^W z?1(!crZe;~6K95&hMM6ocUD+{yKX!G1-=9inBv}}i z4shJzQDHcnd2*9&$D~X~;Zv$I%pXnb$8!CAuaZq?6n*P9(5>lRS$Tk4#^pK{p$(_G zG=|5F`cz@0l02fwdeYuA6n4Q!!}m~z(rd&6J5zLsjv_76MMh+bP8}B93PZK+DrKUx z;3&{3M}fJHS|Ai%y3kc>#C(bxn6HVEf@;K9-$b> z!(579vA*bod3sTU=*`1uiv9v!@YUq&U{t7XO(^&ab+9_5;DoUSuQOIPMGCH=O>0CU zTcGTo;k<9yPIR9wqu1G~lw$``cTuoYHS1Py*RIC4_ z5i&}jCnMB(a^Jp>MWJhIl)*8sBKJcZ?$Ev9G|Tz*wUd zk5G0$k&S{Mtr?BvXvIX=Sm+w#1Tzx&k-@Ys@%=*b!hoveWU8&;9f&2>#%AJ-q~}z* zW)%I3l7ar!9Q2jqwOleOX<90m*~1?ZGMdCIFbZ`ftyy}~Jebk)KVT^hr#Vr>*`I-_ z8uXc?hn9Chc|ID=X=+ph+F& zKjtq&84NaBu9hsL-7cd^E~E7=H^49A0I75)qvH8Hmv26-sxbG;yb%P=iO2X4Q5#^` zkw{QQH3lt9BPt$pgF(|9-rYpeb!WExR0N<626QghzU_j3Av1Jcn*&yC4g2% zhDHF9{r$?QK?6n_3}y{R6D~@IGpnX$-JrEG&2VYDD5+|sUd01xu=Zxb>ZL_}&~Ob( z>48Drs9!xViKLQIRD(l}m9*2GkHLs0P~!R|v~*rFr*AOkC@gVHsn={;(M*jR)R;j@ z8rmS9q1y2Hcyv%1mZEA-1`3LfDC&i1QW0SZJK^NWdZ#8Ob81>2dqSz%kwqFtvwiLo z=4ca0dkGX!pNF0i5w(8U#>-@7W2dw6UVW(KTnsjpT#K8wmD%RS#?}(|$VGx1uX!dt zQ#+@3&TO3CQhxr>TzzlJxfE!=dTIR9lr$IEQDRnHtfzIcxutY`Db#v5v~xDJv;53k z<@5b>p=7D=+YQ85e=pQ@t!J`ldgDyP&Ay4AxzH=6zQu6cl<;oT^xhfjgO_K+yGkb( zTQ*Pq{@wQJzM0?$M`v4hkG~2dXla`|_g;AV%)6UWptbbsJ^zM@_^f}+^ubx*&Jwlg z4^3Q{^>3XPW_|4?YQ;r7{;QsG&%|>R!o)LEJEr3|#gb>9-G?=GSGjR+qI;fgT?~Z2 zd-Ng+Hvgrm9N7ARI6bW^4DD}SYU1xUbZ8f0 z&Ao1ND|qllNZ)n``UriysiAj|jDLrVH4^{ccE~#a^qNCtgkJGCh2K8qN z@X|)RHLzGmk*=AEvp}1?^wjeJUSf*$C3+0K9EfXp$(8@g#PY+I3uqCE>3LQC$Eq** zME0?fXk98%Qmf>Hj%-#{kQRP|N`T5@wv$`&SNHG$%%>FDBp1X2%m3w50>H9p>dS{r zsEt*;WgHjZp7*x%vZC`TWKcB(%X%um&6KkpUxax?7t7_W0Di=0I26rbvI1gjT0>@qpmU! zNwnbYSt!(yR1jI!=TM}nB4P?8aBD13RQE&eYbZYnKY+r9ptIqAAb53Te8dE}!TCV@ zk}p_pXrE!H^EWq@H}u@;{qn@E6AQl6i=ie9Ah}6ys(Y?}2jF6O$KCMW+3?<*Z_I`F zmFfU;S~~8AJ7>e4bKz*I?p~m+%(i`Z-`jx5(O@FS^EaQnxxHNX(r0HskAE6pV2>>Y zLMC!FJl4>Udgs8w9&(G?hyNbWp)UFh*XBbJ`iqDY(n%1ghN|5!!kS(qtDd-y>}`f2 zT$eTBdIdW~>cY#grl}e$an!V|TFPqkT05_4#r$8boc}{BJTqok4}HYyje#OHI@g)G z=t38~_)}7DQwp$esETBj?jf=YKm{ysquBNogGOPC|WR%ObJ6QiSk2P2`p4Cxq zSg*8DGo!IUB$PjPQh?h?Dkqz77sv;L@C*Skom7U0A=Q~l1HcRFKyDa(MtsE(0j0rn zgnJO)VbHBiWt5RF75TX0uE+S3Y0Z4Gs5UflKEK;~n|P{siTpwPU1B})r+Sl6@f@jW zX!%VQB07h(k`!blpneJNV_AJHzok-n1SkdHM1CaDcGS8fR_`Cy!w+&)jBkHTwXdo$ z$q(1jcA2dUOFmdpf!9C0y2bc>>){G%+H|+d>|I@_@*Vc5swykYvz8n4fr=u~jq{7; z>pRzkh(|^IH>uKKU{@|$)QPNVFw$rm=pF0Poi~XL-f{dmy##Qkd1*E3EDp7O3w0le zY6v)0W@LupEUCRvBxBv{zy+7dBbKn$su)E&pMDfsL8j zeRsBhx$D-h1@@$wJv$%&&E2>-8y6Q?X^Hh-Wyjfx;1%!vhNf%BCXY?MKKbh9lS_@w z*WR9ddrFx6_2pB*j=u0+@8((W=6Nq~D%t(<_D^lQ$2{dk;Oi-F?PW_O(sP<+jc; z*EP>XEk#_%JQG=J+%xmrxyJpMPu+7jmfdZpmhR8A&-0(cGMxBP@7$(&rgaGva;@`B z8zf3y3rzSStSsC$6A4`FL;{>5cOoGw$wb0(gZ5%I{o3(gnZA&DA2x5oYByA@Zm+pZ zs0NRFBJm+n`>-AzoX70`VMWYkqyy>+$ReR@zl0xLSSadS#!E2|TL|U-hIsx(cF&UC t-;fOtT-{XTgNxKAs^P)@2-ULkHbod;DgQMc{wKr!>BtIAm=?2#{{qD*6&L^j