用 Python 批量修改 PDF 文件名

写了个小脚本能根据PDF内容自动用AI重命名。

例如“附件（1）.pdf”能根据内容变成“2023年用户访谈总结.pdf”

带 OCR 功能，PDF 不是文本也可以识别并改名。

效果如图：

白嫖党必不可能用 openai，当然是选择 Gemini 啦（要我给 openai 二次付费，不可能😤）

翻了一下 Gemini API 文档，在 ChatGPT 老师的帮助下20分钟搞定 让我们感谢 ChatGPT 老师让我等根本不懂码的同学实现 python 自由！

{% box color %}
2024-04-01 更新防风控，不再通过 google-generativeai 库来访问了，尝试通过自己设置地址访问（用了CF Worker 的默认地址）。参考链接： {% link https://zhile.io/2023/12/24/gemini-pro-proxy.html %} {% endbox %}

{% box 实现思路 & To Do color %}
实现思路灰常的简单：读取 PDF 文件，传给 Gemini，生成文件标题并修改

未来可能各种文件类型都可以安排上，以及 prompt 还可以继续优化一下。

{% endbox %}

代码放在 GitHub：

{% ghcard infinitesum/PDFs-Smart-Rename %}

访问不了 GitHub 可以继续往下看：

使用非常的简单，在代码里填一下文件路径和 Gemini API key 直接运行就可以了。

{% box 获取 Gemini API Key color %}

打开 https://makersuite.google.com/
用 Google 账号登录
点击 Get API Key -> Create API key in new project 保存好 key

注意事项

目前该服务不支持香港IP，查看支持的区域
由于 IP （如多人共享 IP）或者频率的关系，可能会被谷歌云判定为滥用，导致 API Key 或者谷歌云账户被禁用，请谨慎使用，或者使用小号。

{% endbox %}

想要保密性好可以搞环境变量啥的，但我反正是自己用嘛，就懒得整了（不是

{% box 注意事项 color %}
1、操作不可撤销！！！请不要像我一样把整个路径的 PDF 都丢进去。。（在开始的30s 我还在为一个个转换成功的提示而沾沾自喜，下一秒就意识到了事情的不对🌚此操作还是在我加 OCR 功能之前，类目……）如果实在是手抖不小心点到请务必按 ctrl+c before all hell breaks loose 🥹 2、…… {% endbox %}

分为批量重命名版和单文件版：

批量重命名版#

import os
import fitz  # PyMuPDF
import pytesseract
import requests  # 引入requests
from PIL import Image

# 配置Tesseract的路径，如果需要的话
# pytesseract.pytesseract.tesseract_cmd = r'<full_path_to_your_tesseract_executable>'


def extract_text_from_first_page(pdf_path):
    """
    从PDF文件的第一页中提取文本。
    """
    with fitz.open(pdf_path) as doc:
        if len(doc) > 0:
            page = doc[0]  # 获取第一页
            text = page.get_text()
            if len(text) < 50:  # 假设有效文本至少有50个字符
                # 尝试OCR
                pix = page.get_pixmap()  # 从页面获取像素映射（图像）
                img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                text = pytesseract.image_to_string(img)
            return text
    return ""

def generate_title_with_gemini(pdf_text, api_key):
    """使用Gemini API根据PDF文本内容生成文件标题。"""
    headers = {'Content-Type': 'application/json'}
    prompt = "Suggest a title for the following document content in its original language, if it's a research or science paper, just extract the relevant information and name it like: author&author-publishyear-originaltitle. show el.al for multiple authors:"
    # 将提示和PDF文本内容结合
    full_text = prompt + "\n" + pdf_text
    data = {
        "contents": [{"parts": [{"text": full_text}]}]
    }
    response = requests.post(
        f'https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?key={api_key}', # 替换为你想要代理访问的域名
        json=data,
        headers=headers
    )
    print("原始响应:", response.text)  # 调试输出

    if response.status_code != 200:
        print(f"请求失败，状态码：{response.status_code}")
        return ""

    try:
        response_data = response.json()
        if 'candidates' in response_data and len(response_data['candidates']) > 0:
            # 直接提取API响应中的文本内容
            text_content = response_data['candidates'][0]['content']['parts'][0]['text']
            # 由于响应可能包含原始提示信息，你可能需要根据返回的文本格式做进一步的处理来提取实际的标题
            # 这里只是简单地返回整个响应文本，你可能需要根据实际情况进行调整
            return text_content.strip()
        return ""
    except Exception as e:
        print(f"处理API响应时发生错误：{e}")
        return ""


def rename_pdf(pdf_path, api_key):
    """
    提取PDF文本，使用Gemini生成标题，并重命名PDF文件。
    """
    pdf_text = extract_text_from_first_page(pdf_path)
    new_title = generate_title_with_gemini(pdf_text, api_key) + ".pdf"
    new_path = os.path.join(os.path.dirname(pdf_path), new_title)

    if not os.path.exists(new_path):  # 确保不会覆盖已存在的文件
        os.rename(pdf_path, new_path)
        print(f"文件已重命名为: {new_path}")
    else:
        print("已存在同名文件，未执行重命名。")

def rename_pdfs_in_directory(directory_path, api_key):
    """
    遍历指定目录中的所有PDF文件，并尝试重命名它们。
    """
    for filename in os.listdir(directory_path):
        if filename.lower().endswith('.pdf'):
            pdf_path = os.path.join(directory_path, filename)
            print(f"处理文件：{pdf_path}")
            try:
                rename_pdf(pdf_path, api_key)
            except Exception as e:
                print(f"处理文件 {pdf_path} 时发生错误：{e}")

# 批量重命名目录下的PDF文件
# 请替换以下变量中的占位符

directory_path = "/Users/summer/Downloads/1"  # 替换为你的PDF文件目录
your_api_key = "YOUR_API_KEY_HERE"  # 替换为你的API密钥

rename_pdfs_in_directory(directory_path, your_api_key)

单文件版#

import os
import fitz  # PyMuPDF
import pytesseract
import requests  # 引入requests
from PIL import Image

def extract_text_from_first_page(pdf_path):
    """从PDF文件的第一页中提取文本。"""
    with fitz.open(pdf_path) as doc:
        if len(doc) > 0:
            page = doc[1]  # 获取第2页
            text = page.get_text()
            if len(text) < 50:  # 假设有效文本至少有50个字符
                # 尝试OCR
                pix = page.get_pixmap()  # 从页面获取像素映射（图像）
                img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                text = pytesseract.image_to_string(img)
            return text
    return ""

def generate_title_with_gemini(pdf_text, api_key):
    """使用Gemini API根据PDF文本内容生成文件标题。"""
    headers = {'Content-Type': 'application/json'}
    prompt = "Suggest a title for the following document content in its original language, if it's a research or science paper, just extract the relevant information and name it like: author&author-publishyear-originaltitle. show el.al for multiple authors:"
    # 将提示和PDF文本内容结合
    full_text = prompt + "\n" + pdf_text
    data = {
        "contents": [{"parts": [{"text": full_text}]}]
    }
    response = requests.post(
        f'https://generativelanguage.googleapis.com/v1beta/models/gemini-pro:generateContent?key={api_key}', # 替换为你自己想通过访问的域名
        json=data,
        headers=headers
    )
    print("原始响应:", response.text)  # 调试输出

    if response.status_code != 200:
        print(f"请求失败，状态码：{response.status_code}")
        return ""

    try:
        response_data = response.json()
        if 'candidates' in response_data and len(response_data['candidates']) > 0:
            # 直接提取API响应中的文本内容
            text_content = response_data['candidates'][0]['content']['parts'][0]['text']
            # 由于响应可能包含原始提示信息，你可能需要根据返回的文本格式做进一步的处理来提取实际的标题
            # 这里只是简单地返回整个响应文本，你可能需要根据实际情况进行调整
            return text_content.strip()
        return ""
    except Exception as e:
        print(f"处理API响应时发生错误：{e}")
        return ""


def rename_pdf(pdf_path, api_key):
    """提取PDF文本，使用Gemini生成标题，并重命名PDF文件。"""
    pdf_text = extract_text_from_first_page(pdf_path)
    new_title = generate_title_with_gemini(pdf_text, api_key) + ".pdf"
    new_path = os.path.join(os.path.dirname(pdf_path), new_title)
    if not os.path.exists(new_path):  # 确保不会覆盖已存在的文件
        os.rename(pdf_path, new_path)
        print(f"文件已重命名为: {new_path}")
    else:
        print("已存在同名文件，未执行重命名。")

# 单个PDF文件路径
pdf_file_path = "/Users/summer/Downloads/1.pdf"  # 请替换为你的PDF文件路径
your_api_key = "YOUR_API_KEY_HERE"  # 替换为你的API密钥

rename_pdf(pdf_file_path, your_api_key)

> cd ..