The world's most clever kitty cat

Merge pull request #6 from loco-choco/main

Added learning from pdf (admin only)

authored by bwc9876.dev and committed by

GitHub a985794c 91aadb17

+36 -1
+2 -1
pyproject.toml
··· 13 13 "py-cord>=2.6.1", 14 14 "python-dotenv>=1.0.1", 15 15 "pytesseract>=0.3.13", 16 - "pillow>=11.1.0" 16 + "pillow>=11.1.0", 17 + "pypdf>=5.3.1" 17 18 ] 18 19 19 20 [project.scripts]
+23
src/bingus/cogs/markov.py
··· 4 4 import discord 5 5 import pytesseract 6 6 import PIL 7 + import pypdf 7 8 from discord.ext import commands 8 9 from discord.message import Message 9 10 from pathlib import Path ··· 95 96 await ctx.respond(head, file=discord.File(fd, filename="weights.txt")) 96 97 else: 97 98 await ctx.respond(f"{head}:\n{msg}") 99 + 100 + @require_owner 101 + @commands.slash_command() 102 + async def pdf( 103 + self, ctx: discord.ApplicationContext, file: discord.Option(discord.Attachment) 104 + ): 105 + await ctx.defer(ephemeral=True) 106 + raw = await file.read() 107 + try: 108 + pdf = pypdf.PdfReader(io.BytesIO(raw)) 109 + # i = 0 110 + for page in pdf.pages: 111 + # i += 1 112 + # printf("Bingus learned a page! {i}/{pdf.get_num_pages()}") 113 + text = page.extract_text() 114 + self.markov.learn(text) 115 + await self.update_words() 116 + await ctx.respond( 117 + "> Bingus learned something from the pdf!", ephemeral=True 118 + ) 119 + except pypdf.errors.PdfReadError: 120 + await ctx.respond("> Bingus only understands pdf files!", ephemeral=True) 98 121 99 122 @require_owner 100 123 @commands.slash_command()
+11
uv.lock
··· 90 90 { name = "msgpack" }, 91 91 { name = "pillow" }, 92 92 { name = "py-cord" }, 93 + { name = "pypdf" }, 93 94 { name = "pytesseract" }, 94 95 { name = "python-dotenv" }, 95 96 ] ··· 100 101 { name = "msgpack", specifier = ">=1.1.0" }, 101 102 { name = "pillow", specifier = ">=11.1.0" }, 102 103 { name = "py-cord", specifier = ">=2.6.1" }, 104 + { name = "pypdf", specifier = ">=5.3.1" }, 103 105 { name = "pytesseract", specifier = ">=0.3.13" }, 104 106 { name = "python-dotenv", specifier = ">=1.0.1" }, 105 107 ] ··· 360 362 sdist = { url = "https://files.pythonhosted.org/packages/27/c7/c539d69d5cfa1ea5891d596212f73d619e40c7fc9f02ae906f4147993b94/py_cord-2.6.1.tar.gz", hash = "sha256:36064f225f2c7bbddfe542d5ed581f2a5744f618e039093cf7cd2659a58bc79b", size = 965087 } 361 363 wheels = [ 362 364 { url = "https://files.pythonhosted.org/packages/e7/90/2690ded84e34b15ca2619932a358c1b7dc6d28fe845dfbd01929fc33c9da/py_cord-2.6.1-py3-none-any.whl", hash = "sha256:e3d3b528c5e37b0e0825f5b884cbb9267860976c1e4878e28b55da8fd3af834b", size = 1089154 }, 365 + ] 366 + 367 + [[package]] 368 + name = "pypdf" 369 + version = "5.3.1" 370 + source = { registry = "https://pypi.org/simple" } 371 + sdist = { url = "https://files.pythonhosted.org/packages/da/5b/67df68ec4b934aae9ca89edfb43a869c5edb3bd504dd275be9e83001d3e9/pypdf-5.3.1.tar.gz", hash = "sha256:0b9b715252b3c60bacc052e6a780e8b742cee9b9a2135f6007bb018e22a5adad", size = 5011845 } 372 + wheels = [ 373 + { url = "https://files.pythonhosted.org/packages/f4/0c/75da081f5948e07f373a92087e4808739a3248d308f01c78c9bd4a51defa/pypdf-5.3.1-py3-none-any.whl", hash = "sha256:20ea5b8686faad1b695fda054462b667d5e5f51e25fbbc092f12c5e0bb20d738", size = 302042 }, 363 374 ] 364 375 365 376 [[package]]