jinwei12 commited on
Commit
3dffa84
1 Parent(s): acdb896

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -23
app.py CHANGED
@@ -16,6 +16,7 @@ from haversine import haversine, Unit
16
  dataset=None
17
 
18
 
 
19
  def generate_human_readable(tokens,labels):
20
  ret = []
21
  for t,lab in zip(tokens,labels):
@@ -49,12 +50,8 @@ def getSlice(tensor):
49
 
50
  def getIndex(input):
51
 
52
- # Model name from Hugging Face model hub
53
- model_name = "zekun-li/geolm-base-toponym-recognition"
54
 
55
- # Load tokenizer and model
56
- tokenizer = AutoTokenizer.from_pretrained(model_name)
57
- model = AutoModelForTokenClassification.from_pretrained(model_name)
58
 
59
  # Tokenize input sentence
60
  tokens = tokenizer.encode(input, return_tensors="pt")
@@ -126,11 +123,7 @@ def cutSlices(tensor, slicesList):
126
  def MLearningFormInput(input):
127
 
128
 
129
- model_name = "zekun-li/geolm-base-cased"
130
-
131
- tokenizer = AutoTokenizer.from_pretrained(model_name)
132
-
133
- model = GeoLMModel.from_pretrained(model_name)
134
 
135
  tokens = tokenizer.encode(input, return_tensors="pt")
136
 
@@ -181,11 +174,8 @@ def generate_human_readable(tokens,labels):
181
 
182
  def getLocationName(input_sentence):
183
  # Model name from Hugging Face model hub
184
- model_name = "zekun-li/geolm-base-toponym-recognition"
185
 
186
- # Load tokenizer and model
187
- tokenizer = AutoTokenizer.from_pretrained(model_name)
188
- model = AutoModelForTokenClassification.from_pretrained(model_name)
189
 
190
  # Tokenize input sentence
191
  tokens = tokenizer.encode(input_sentence, return_tensors="pt")
@@ -278,12 +268,14 @@ def search_geonames(toponym, df):
278
 
279
  def get50Neigbors(locationID, dataset, k=50):
280
 
 
 
281
  input_row = dataset.loc[dataset['GeonameID'] == locationID].iloc[0]
282
 
283
 
284
  lat, lon, geohash,name = input_row['Latitude'], input_row['Longitude'], input_row['Geohash'], input_row['Name']
285
 
286
- filtered_dataset = dataset.loc[dataset['Geohash'].str.startswith(geohash[:5])].copy()
287
 
288
  filtered_dataset['distance'] = filtered_dataset.apply(
289
  lambda row: haversine((lat, lon), (row['Latitude'], row['Longitude']), Unit.KILOMETERS),
@@ -291,6 +283,10 @@ def get50Neigbors(locationID, dataset, k=50):
291
  ).copy()
292
 
293
 
 
 
 
 
294
  filtered_dataset = filtered_dataset.sort_values(by='distance')
295
 
296
 
@@ -301,9 +297,8 @@ def get50Neigbors(locationID, dataset, k=50):
301
  neighbors=nearest_neighbors.values.tolist()
302
 
303
 
304
- model_name = "zekun-li/geolm-base-toponym-recognition"
305
 
306
- tokenizer = AutoTokenizer.from_pretrained(model_name)
307
 
308
  sep_token_id = tokenizer.convert_tokens_to_ids(tokenizer.sep_token)
309
  cls_token_id = tokenizer.convert_tokens_to_ids(tokenizer.cls_token)
@@ -328,8 +323,6 @@ def get50Neigbors(locationID, dataset, k=50):
328
 
329
  #--------------------------------------------
330
 
331
- model = GeoLMModel.from_pretrained(model_name)
332
-
333
 
334
  tokens = torch.Tensor(neighbor_token_list).unsqueeze(0).long()
335
 
@@ -351,6 +344,9 @@ def get50Neigbors(locationID, dataset, k=50):
351
  res=cutSlices(outputs.last_hidden_state, [targetIndex])
352
 
353
 
 
 
 
354
  return res
355
 
356
 
@@ -374,22 +370,55 @@ def cosine_similarity(target_feature, candidate_feature):
374
 
375
  def getCSV():
376
  dataset = pd.read_csv('geohash.csv')
377
-
378
  return dataset
379
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
  def showing(df):
381
 
382
  m = folium.Map(location=[df['lat'].mean(), df['lon'].mean()], zoom_start=5)
383
 
384
- size_scale = 100
385
- color_scale = 255
386
-
387
  for i in range(len(df)):
388
  lat, lon, prob = df.iloc[i]['lat'], df.iloc[i]['lon'], df.iloc[i]['prob']
389
 
390
  size = int(prob**2 * size_scale )
391
  color = int(prob**2 * color_scale)
392
 
 
393
  folium.CircleMarker(
394
  location=[lat, lon],
395
  radius=size,
@@ -398,8 +427,10 @@ def showing(df):
398
  fill_color=f'#{color:02X}0000'
399
  ).add_to(m)
400
 
 
401
  m.save("map.html")
402
 
 
403
  with open("map.html", "r", encoding="utf-8") as f:
404
  map_html = f.read()
405
 
 
16
  dataset=None
17
 
18
 
19
+
20
  def generate_human_readable(tokens,labels):
21
  ret = []
22
  for t,lab in zip(tokens,labels):
 
50
 
51
  def getIndex(input):
52
 
 
 
53
 
54
+ tokenizer, model= getModel1()
 
 
55
 
56
  # Tokenize input sentence
57
  tokens = tokenizer.encode(input, return_tensors="pt")
 
123
  def MLearningFormInput(input):
124
 
125
 
126
+ tokenizer,model=getModel2()
 
 
 
 
127
 
128
  tokens = tokenizer.encode(input, return_tensors="pt")
129
 
 
174
 
175
  def getLocationName(input_sentence):
176
  # Model name from Hugging Face model hub
177
+ tokenizer, model= getModel1()
178
 
 
 
 
179
 
180
  # Tokenize input sentence
181
  tokens = tokenizer.encode(input_sentence, return_tensors="pt")
 
268
 
269
  def get50Neigbors(locationID, dataset, k=50):
270
 
271
+ print("neighbor part----------------------------------------------------------------")
272
+
273
  input_row = dataset.loc[dataset['GeonameID'] == locationID].iloc[0]
274
 
275
 
276
  lat, lon, geohash,name = input_row['Latitude'], input_row['Longitude'], input_row['Geohash'], input_row['Name']
277
 
278
+ filtered_dataset = dataset.loc[dataset['Geohash'].str.startswith(geohash[:7])].copy()
279
 
280
  filtered_dataset['distance'] = filtered_dataset.apply(
281
  lambda row: haversine((lat, lon), (row['Latitude'], row['Longitude']), Unit.KILOMETERS),
 
283
  ).copy()
284
 
285
 
286
+ print("neighbor end----------------------------------------------------------------")
287
+
288
+
289
+
290
  filtered_dataset = filtered_dataset.sort_values(by='distance')
291
 
292
 
 
297
  neighbors=nearest_neighbors.values.tolist()
298
 
299
 
300
+ tokenizer, model= getModel1_0()
301
 
 
302
 
303
  sep_token_id = tokenizer.convert_tokens_to_ids(tokenizer.sep_token)
304
  cls_token_id = tokenizer.convert_tokens_to_ids(tokenizer.cls_token)
 
323
 
324
  #--------------------------------------------
325
 
 
 
326
 
327
  tokens = torch.Tensor(neighbor_token_list).unsqueeze(0).long()
328
 
 
344
  res=cutSlices(outputs.last_hidden_state, [targetIndex])
345
 
346
 
347
+
348
+
349
+
350
  return res
351
 
352
 
 
370
 
371
  def getCSV():
372
  dataset = pd.read_csv('geohash.csv')
 
373
  return dataset
374
 
375
+ @st.cache_data
376
+
377
+ def getModel1():
378
+ # Model name from Hugging Face model hub
379
+ model_name = "zekun-li/geolm-base-toponym-recognition"
380
+
381
+ # Load tokenizer and model
382
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
383
+ model = AutoModelForTokenClassification.from_pretrained(model_name)
384
+
385
+ return tokenizer,model
386
+
387
+ def getModel1_0():
388
+ # Model name from Hugging Face model hub
389
+ model_name = "zekun-li/geolm-base-toponym-recognition"
390
+
391
+ # Load tokenizer and model
392
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
393
+ model = GeoLMModel.from_pretrained(model_name)
394
+ return tokenizer,model
395
+
396
+
397
+
398
+ def getModel2():
399
+
400
+ model_name = "zekun-li/geolm-base-cased"
401
+
402
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
403
+
404
+ model = GeoLMModel.from_pretrained(model_name)
405
+
406
+ return tokenizer,model
407
+
408
+
409
  def showing(df):
410
 
411
  m = folium.Map(location=[df['lat'].mean(), df['lon'].mean()], zoom_start=5)
412
 
413
+ size_scale = 100
414
+ color_scale = 255
 
415
  for i in range(len(df)):
416
  lat, lon, prob = df.iloc[i]['lat'], df.iloc[i]['lon'], df.iloc[i]['prob']
417
 
418
  size = int(prob**2 * size_scale )
419
  color = int(prob**2 * color_scale)
420
 
421
+ # 在Folium地图上添加标记
422
  folium.CircleMarker(
423
  location=[lat, lon],
424
  radius=size,
 
427
  fill_color=f'#{color:02X}0000'
428
  ).add_to(m)
429
 
430
+ # 保存Folium地图为HTML文件
431
  m.save("map.html")
432
 
433
+ # 在Streamlit中嵌入HTML文件
434
  with open("map.html", "r", encoding="utf-8") as f:
435
  map_html = f.read()
436