由于 IP (如多人共享 IP)或者频率的关系,可能会被谷歌云判定为滥用,导致 API Key 或者 谷歌云账户被禁用,请谨慎使用,或者使用小号。
想要保密性好可以搞环境变量啥的,但我反正是自己用嘛,就懒得整了(不是
注意事项
1、操作不可撤销!!!请不要像我一样把整个路径的 PDF 都丢进去。。(在开始的30s 我还在为一个个转换成功的提示而沾沾自喜,下一秒就意识到了事情的不对🌚此操作还是在 我加 OCR 功能之前,类目……)如果实在是手抖不小心点到请务必按 ctrl+c before all hell breaks loose 🥹 2、……
defextract_text_from_first_page(pdf_path): """ 从PDF文件的第一页中提取文本。 """ with fitz.open(pdf_path) as doc: iflen(doc) > 0: page = doc[0] # 获取第一页 text = page.get_text() iflen(text) < 50: # 假设有效文本至少有50个字符 # 尝试OCR pix = page.get_pixmap() # 从页面获取像素映射(图像) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) text = pytesseract.image_to_string(img) return text return""
defgenerate_title_with_gemini(pdf_text, api_key): """使用Gemini API根据PDF文本内容生成文件标题。""" headers = {'Content-Type': 'application/json'} prompt = "Suggest a title for the following document content in its original language, if it's a research or science paper, just extract the relevant information and name it like: author&author-publishyear-originaltitle. show el.al for multiple authors:" # 将提示和PDF文本内容结合 full_text = prompt + "\n" + pdf_text data = { "contents": [{"parts": [{"text": full_text}]}] } response = requests.post( f'https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?key={api_key}', # 替换为你想要代理访问的域名 json=data, headers=headers ) print("原始响应:", response.text) # 调试输出
if response.status_code != 200: print(f"请求失败,状态码:{response.status_code}") return""
import os import fitz # PyMuPDF import pytesseract import requests # 引入requests from PIL import Image
defextract_text_from_first_page(pdf_path): """从PDF文件的第一页中提取文本。""" with fitz.open(pdf_path) as doc: iflen(doc) > 0: page = doc[1] # 获取第2页 text = page.get_text() iflen(text) < 50: # 假设有效文本至少有50个字符 # 尝试OCR pix = page.get_pixmap() # 从页面获取像素映射(图像) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) text = pytesseract.image_to_string(img) return text return"" defgenerate_title_with_gemini(pdf_text, api_key): """使用Gemini API根据PDF文本内容生成文件标题。""" headers = {'Content-Type': 'application/json'} prompt = "Suggest a title for the following document content in its original language, if it's a research or science paper, just extract the relevant information and name it like: author&author-publishyear-originaltitle. show el.al for multiple authors:" # 将提示和PDF文本内容结合 full_text = prompt + "\n" + pdf_text data = { "contents": [{"parts": [{"text": full_text}]}] } response = requests.post( f'https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?key={api_key}', # 替换为你自己想通过访问的域名 json=data, headers=headers ) print("原始响应:", response.text) # 调试输出
if response.status_code != 200: print(f"请求失败,状态码:{response.status_code}") return""