# 调用Dify API进行文本比对
def call_dify_classify(system_prompt, file_content) :
    """调用Dify API对行业标准文件进行专业划分"""
    DIFY_API_URL = "http://IP:端口/v1/chat-messages" 
    DIFY_API_KEY = "app-388888888888888888888888888XszX"
    
    headers = {
        "Authorization": f"Bearer {DIFY_API_KEY}",
        "Content-Type": "application/json",
        "Accept": "application/json",
    }
    
    payload = {
        "inputs": {"system_prompt": system_prompt ,
                   "file_content":file_content
                   },
                    "query": "什么专业",  # 传递从txt文件读取的内容
                    "response_mode": "blocking",
                    "conversation_id": "",
                    "user": "txt-file-processor"
                  
    }
    try:
        print(f"正在请求Dify API: {DIFY_API_URL}")
        response = requests.post(DIFY_API_URL, headers=headers, json=payload)
        response.raise_for_status()  # 检查请求是否成功
        print(f"API响应状态码: {response.status_code}")
        
        # 对API返回的内容进行解码处理,避免乱码问题
        response_text = response.text
        
        response_text = response_text.encode('utf-8').decode('unicode_escape')  # 解码返回的unicode
        
        # 清理返回数据中的无效控制字符(比如换行符等)
        response_text = re.sub(r'[\x00-\x1F\x7F]', '', response_text)  # 删除控制字符
        # 清理无效的转义字符
        response_text = re.sub(r'\\(?!["\\/bfnrt])', '', response_text)
        # 在键值对之间插入逗号
        response_text = re.sub(r'(\w+):.*?(?=\w+)', r'\1, ', response_text)

        print(f"API返回数据: {response_text}")  # 打印原始文本,便于调试
        return json.loads(response_text)  
    except requests.exceptions.RequestException as e:
        print(f"API请求失败: {e}")
        return None
    
    
# Dify返回的JSON数据
def extract_and_clean_answer(dify_response):
    """提取LLM返回的answer部分,并清理不需要的字符"""
    try:
        # 获取LLM的answer字段
        answer = dify_response.get("answer", "")
        
        # 清理多余的换行符、空格等
        cleaned_answer = answer.replace("**", "").strip()
    
        return cleaned_answer
    except Exception as e:
        print(f"处理LLM返回的answer部分时出错: {e}")
        return ""   
if __name__ == "__main__":
     # 提取文件中的文字到变量
     pdf_content = extract_scanned_pdf_to_variable(pdf_path = file_path )
     ## 调用大模型输出文件的专业类别
     dify_response = call_dify_classify("什么专业", pdf_content )  # 直接传递当前行的text
     if not dify_response:
        print(f"Dify API调用失败!")
        continue
    # 处理并获取结果
   results = extract_and_clean_answer(dify_response)
   wrapped_text = textwrap.fill(results, width=45)

    
Logo

腾讯云面向开发者汇聚海量精品云计算使用和开发经验,营造开放的云计算技术生态圈。

更多推荐