| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 
 | import xlrd
 from openai import OpenAI
 import openpyxl
 import json
 import time
 
 
 
 api_key = "阿巴阿巴歪比巴卜"
 
 
 
 def shuju_huoqu():
 """
 数据获取函数
 功能:读取Excel文件路径、选择表单、提取指定列数据
 返回:原始数据列表、起始行号、新文件路径、表单索引、结束行号、原始文件路径
 """
 
 old_file_dizhi = input("输入文件地址:").strip('"')
 
 index = old_file_dizhi.rfind("\\")
 new_file_dizhi = old_file_dizhi[:index] + "\\处理后的表格.xlsx"
 
 
 book = xlrd.open_workbook(old_file_dizhi)
 print(f" - 该文件包含表单数量为:{book.nsheets}")
 print(f" - 各个表单的名称分别为:")
 
 for i in book.sheet_names():
 print("  " + i)
 print("\n")
 
 
 biaodan_shu = int(input("选择表单(从上到下依次为1,2...):")) - 1
 biaodan = book.sheet_by_index(biaodan_shu)
 print(f" - 表单名:{biaodan.name}    包含 {biaodan.nrows} 行 {biaodan.ncols} 列 ")
 
 
 print(" - 该表单表头由左到右依次为:")
 i = 0
 while (i < biaodan.ncols):
 k = i + 1
 print(f"  {k}." + str(biaodan.cell_value(rowx=0, colx=i)))
 i += 1
 
 
 print(" ")
 col = int(input("数据位于第几列(从左到右依次为1,2...):")) - 1
 
 
 count = 0
 for row in range(biaodan.nrows):
 cell_value = str(biaodan.cell_value(row, col))
 if cell_value.strip() != "":
 count += 1
 
 
 print(f" - 该列数据共 {count} 行,前五项元素为:")
 i = 0
 while (i < 5):
 k = i + 1
 print(f"  {k}." + str(biaodan.cell_value(rowx=i, colx=col)))
 i += 1
 
 
 print(" ")
 biaotou = int(input("数据从第几行开始(除表头):")) - 1
 print(f" - 数据从第 {biaotou + 1} 行到第 {count} 行")
 
 
 print("开始获取数据...")
 shuju = []
 for i in range(biaotou, count):
 value = str(biaodan.cell_value(rowx=i, colx=col))
 shuju.append(value)
 
 print(" - 获取的数据:")
 print(str(shuju))
 return shuju, biaotou, new_file_dizhi, biaodan_shu, count - 1, old_file_dizhi
 
 
 def shuju_chuli(shuju):
 """
 数据去重函数
 参数:原始数据列表
 返回:保持顺序的去重后列表
 """
 c_shuju = []
 for j in shuju:
 if j not in c_shuju:
 c_shuju.append(j)
 print("数据处理完成,处理后的数据:")
 print(c_shuju)
 return c_shuju
 
 
 def diaoyong_deepseek(c_shuju):
 """
 DeepSeek API调用函数
 参数:去重后的数据列表
 返回:字典{原始数据: 省份}
 """
 j_shuju = []
 d_shuju = {}
 total = len(c_shuju)
 
 
 for i in range(0, total, 250):
 batch = c_shuju[i:i + 250]
 print(f"正在发送第{i // 250 + 1}批数据...")
 
 
 while True:
 try:
 client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com")
 response = client.chat.completions.create(
 model="deepseek-reasoner",
 messages=[
 {
 "role": "system",
 "content": """
 处理规则:
 1. 输入列表元素→匹配省份
 2. 无法匹配→"***所在地址未知***"
 3. 输出纯JSON格式
 """
 },
 {"role": "user", "content": str(batch) + "返回结果应仅包含一个json格式的数据集"},
 ],
 temperature=0,
 stream=False,
 max_tokens=8000
 )
 except Exception as e:
 print("API调用失败,正在重试...")
 time.sleep(1)
 continue
 else:
 
 content = response.choices[0].message.content
 
 start = content.find('{')
 end = content.rfind('}') + 1
 if start != -1 and end != 0:
 result = json.loads(content[start:end])
 d_shuju.update(result)
 print("当前批次处理结果:", result)
 break
 finally:
 client.close()
 return d_shuju
 
 
 def shuju_luru(d_shuju, shuju, kaitou_hang, new_file_dizhi, biaodan_shu, jiewei_hang, old_file_dizhi):
 """
 数据写入函数
 功能:将省份信息写入Excel指定列
 """
 
 liebiao = [d_shuju.get(item, "***所在地址未知***") for item in shuju]
 
 
 excel = openpyxl.load_workbook(old_file_dizhi)
 sheet = excel.worksheets[biaodan_shu]
 
 
 lie = int(input("选择数据将要写入的列数(从左到右依次为1,2...:"))
 
 
 for idx, value in enumerate(liebiao):
 sheet.cell(row=kaitou_hang + idx + 1, column=lie, value=value)
 
 
 while True:
 try:
 excel.save(new_file_dizhi)
 break
 except Exception as e:
 print(f"保存失败:{e}(可能文件被占用,请关闭后重试)")
 time.sleep(5)
 
 print("保存成功,新文件与原文件位于同一目录下")
 
 
 
 
 if __name__ == "__main__":
 
 shuju, kaitou_hang, new_file_dizhi, biaodan_shu, jiewei_hang, old_file_dizhi = shuju_huoqu()
 
 
 c_shuju = shuju_chuli(shuju)
 
 
 d_shuju = diaoyong_deepseek(c_shuju)
 
 
 shuju_luru(d_shuju, shuju, kaitou_hang, new_file_dizhi, biaodan_shu, jiewei_hang, old_file_dizhi)
 
 |