1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
| import xlrd from openai import OpenAI import openpyxl import json import time
api_key = "阿巴阿巴歪比巴卜"
def shuju_huoqu(): """ 数据获取函数 功能:读取Excel文件路径、选择表单、提取指定列数据 返回:原始数据列表、起始行号、新文件路径、表单索引、结束行号、原始文件路径 """ old_file_dizhi = input("输入文件地址:").strip('"') index = old_file_dizhi.rfind("\\") new_file_dizhi = old_file_dizhi[:index] + "\\处理后的表格.xlsx"
book = xlrd.open_workbook(old_file_dizhi) print(f" - 该文件包含表单数量为:{book.nsheets}") print(f" - 各个表单的名称分别为:") for i in book.sheet_names(): print(" " + i) print("\n")
biaodan_shu = int(input("选择表单(从上到下依次为1,2...):")) - 1 biaodan = book.sheet_by_index(biaodan_shu) print(f" - 表单名:{biaodan.name} 包含 {biaodan.nrows} 行 {biaodan.ncols} 列 ")
print(" - 该表单表头由左到右依次为:") i = 0 while (i < biaodan.ncols): k = i + 1 print(f" {k}." + str(biaodan.cell_value(rowx=0, colx=i))) i += 1
print(" ") col = int(input("数据位于第几列(从左到右依次为1,2...):")) - 1
count = 0 for row in range(biaodan.nrows): cell_value = str(biaodan.cell_value(row, col)) if cell_value.strip() != "": count += 1
print(f" - 该列数据共 {count} 行,前五项元素为:") i = 0 while (i < 5): k = i + 1 print(f" {k}." + str(biaodan.cell_value(rowx=i, colx=col))) i += 1
print(" ") biaotou = int(input("数据从第几行开始(除表头):")) - 1 print(f" - 数据从第 {biaotou + 1} 行到第 {count} 行")
print("开始获取数据...") shuju = [] for i in range(biaotou, count): value = str(biaodan.cell_value(rowx=i, colx=col)) shuju.append(value)
print(" - 获取的数据:") print(str(shuju)) return shuju, biaotou, new_file_dizhi, biaodan_shu, count - 1, old_file_dizhi
def shuju_chuli(shuju): """ 数据去重函数 参数:原始数据列表 返回:保持顺序的去重后列表 """ c_shuju = [] for j in shuju: if j not in c_shuju: c_shuju.append(j) print("数据处理完成,处理后的数据:") print(c_shuju) return c_shuju
def diaoyong_deepseek(c_shuju): """ DeepSeek API调用函数 参数:去重后的数据列表 返回:字典{原始数据: 省份} """ j_shuju = [] d_shuju = {} total = len(c_shuju)
for i in range(0, total, 250): batch = c_shuju[i:i + 250] print(f"正在发送第{i // 250 + 1}批数据...")
while True: try: client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com") response = client.chat.completions.create( model="deepseek-reasoner", messages=[ { "role": "system", "content": """ 处理规则: 1. 输入列表元素→匹配省份 2. 无法匹配→"***所在地址未知***" 3. 输出纯JSON格式 """ }, {"role": "user", "content": str(batch) + "返回结果应仅包含一个json格式的数据集"}, ], temperature=0, stream=False, max_tokens=8000 ) except Exception as e: print("API调用失败,正在重试...") time.sleep(1) continue else: content = response.choices[0].message.content start = content.find('{') end = content.rfind('}') + 1 if start != -1 and end != 0: result = json.loads(content[start:end]) d_shuju.update(result) print("当前批次处理结果:", result) break finally: client.close() return d_shuju
def shuju_luru(d_shuju, shuju, kaitou_hang, new_file_dizhi, biaodan_shu, jiewei_hang, old_file_dizhi): """ 数据写入函数 功能:将省份信息写入Excel指定列 """ liebiao = [d_shuju.get(item, "***所在地址未知***") for item in shuju]
excel = openpyxl.load_workbook(old_file_dizhi) sheet = excel.worksheets[biaodan_shu]
lie = int(input("选择数据将要写入的列数(从左到右依次为1,2...:"))
for idx, value in enumerate(liebiao): sheet.cell(row=kaitou_hang + idx + 1, column=lie, value=value)
while True: try: excel.save(new_file_dizhi) break except Exception as e: print(f"保存失败:{e}(可能文件被占用,请关闭后重试)") time.sleep(5)
print("保存成功,新文件与原文件位于同一目录下")
if __name__ == "__main__": shuju, kaitou_hang, new_file_dizhi, biaodan_shu, jiewei_hang, old_file_dizhi = shuju_huoqu()
c_shuju = shuju_chuli(shuju)
d_shuju = diaoyong_deepseek(c_shuju)
shuju_luru(d_shuju, shuju, kaitou_hang, new_file_dizhi, biaodan_shu, jiewei_hang, old_file_dizhi)
|