Update app.py
Browse files
app.py
CHANGED
@@ -16,6 +16,7 @@ from haversine import haversine, Unit
|
|
16 |
dataset=None
|
17 |
|
18 |
|
|
|
19 |
def generate_human_readable(tokens,labels):
|
20 |
ret = []
|
21 |
for t,lab in zip(tokens,labels):
|
@@ -49,12 +50,8 @@ def getSlice(tensor):
|
|
49 |
|
50 |
def getIndex(input):
|
51 |
|
52 |
-
# Model name from Hugging Face model hub
|
53 |
-
model_name = "zekun-li/geolm-base-toponym-recognition"
|
54 |
|
55 |
-
|
56 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
57 |
-
model = AutoModelForTokenClassification.from_pretrained(model_name)
|
58 |
|
59 |
# Tokenize input sentence
|
60 |
tokens = tokenizer.encode(input, return_tensors="pt")
|
@@ -126,11 +123,7 @@ def cutSlices(tensor, slicesList):
|
|
126 |
def MLearningFormInput(input):
|
127 |
|
128 |
|
129 |
-
|
130 |
-
|
131 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
132 |
-
|
133 |
-
model = GeoLMModel.from_pretrained(model_name)
|
134 |
|
135 |
tokens = tokenizer.encode(input, return_tensors="pt")
|
136 |
|
@@ -181,11 +174,8 @@ def generate_human_readable(tokens,labels):
|
|
181 |
|
182 |
def getLocationName(input_sentence):
|
183 |
# Model name from Hugging Face model hub
|
184 |
-
|
185 |
|
186 |
-
# Load tokenizer and model
|
187 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
188 |
-
model = AutoModelForTokenClassification.from_pretrained(model_name)
|
189 |
|
190 |
# Tokenize input sentence
|
191 |
tokens = tokenizer.encode(input_sentence, return_tensors="pt")
|
@@ -278,12 +268,14 @@ def search_geonames(toponym, df):
|
|
278 |
|
279 |
def get50Neigbors(locationID, dataset, k=50):
|
280 |
|
|
|
|
|
281 |
input_row = dataset.loc[dataset['GeonameID'] == locationID].iloc[0]
|
282 |
|
283 |
|
284 |
lat, lon, geohash,name = input_row['Latitude'], input_row['Longitude'], input_row['Geohash'], input_row['Name']
|
285 |
|
286 |
-
filtered_dataset = dataset.loc[dataset['Geohash'].str.startswith(geohash[:
|
287 |
|
288 |
filtered_dataset['distance'] = filtered_dataset.apply(
|
289 |
lambda row: haversine((lat, lon), (row['Latitude'], row['Longitude']), Unit.KILOMETERS),
|
@@ -291,6 +283,10 @@ def get50Neigbors(locationID, dataset, k=50):
|
|
291 |
).copy()
|
292 |
|
293 |
|
|
|
|
|
|
|
|
|
294 |
filtered_dataset = filtered_dataset.sort_values(by='distance')
|
295 |
|
296 |
|
@@ -301,9 +297,8 @@ def get50Neigbors(locationID, dataset, k=50):
|
|
301 |
neighbors=nearest_neighbors.values.tolist()
|
302 |
|
303 |
|
304 |
-
|
305 |
|
306 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
307 |
|
308 |
sep_token_id = tokenizer.convert_tokens_to_ids(tokenizer.sep_token)
|
309 |
cls_token_id = tokenizer.convert_tokens_to_ids(tokenizer.cls_token)
|
@@ -328,8 +323,6 @@ def get50Neigbors(locationID, dataset, k=50):
|
|
328 |
|
329 |
#--------------------------------------------
|
330 |
|
331 |
-
model = GeoLMModel.from_pretrained(model_name)
|
332 |
-
|
333 |
|
334 |
tokens = torch.Tensor(neighbor_token_list).unsqueeze(0).long()
|
335 |
|
@@ -351,6 +344,9 @@ def get50Neigbors(locationID, dataset, k=50):
|
|
351 |
res=cutSlices(outputs.last_hidden_state, [targetIndex])
|
352 |
|
353 |
|
|
|
|
|
|
|
354 |
return res
|
355 |
|
356 |
|
@@ -374,22 +370,55 @@ def cosine_similarity(target_feature, candidate_feature):
|
|
374 |
|
375 |
def getCSV():
|
376 |
dataset = pd.read_csv('geohash.csv')
|
377 |
-
|
378 |
return dataset
|
379 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
380 |
def showing(df):
|
381 |
|
382 |
m = folium.Map(location=[df['lat'].mean(), df['lon'].mean()], zoom_start=5)
|
383 |
|
384 |
-
size_scale = 100
|
385 |
-
color_scale = 255
|
386 |
-
|
387 |
for i in range(len(df)):
|
388 |
lat, lon, prob = df.iloc[i]['lat'], df.iloc[i]['lon'], df.iloc[i]['prob']
|
389 |
|
390 |
size = int(prob**2 * size_scale )
|
391 |
color = int(prob**2 * color_scale)
|
392 |
|
|
|
393 |
folium.CircleMarker(
|
394 |
location=[lat, lon],
|
395 |
radius=size,
|
@@ -398,8 +427,10 @@ def showing(df):
|
|
398 |
fill_color=f'#{color:02X}0000'
|
399 |
).add_to(m)
|
400 |
|
|
|
401 |
m.save("map.html")
|
402 |
|
|
|
403 |
with open("map.html", "r", encoding="utf-8") as f:
|
404 |
map_html = f.read()
|
405 |
|
|
|
16 |
dataset=None
|
17 |
|
18 |
|
19 |
+
|
20 |
def generate_human_readable(tokens,labels):
|
21 |
ret = []
|
22 |
for t,lab in zip(tokens,labels):
|
|
|
50 |
|
51 |
def getIndex(input):
|
52 |
|
|
|
|
|
53 |
|
54 |
+
tokenizer, model= getModel1()
|
|
|
|
|
55 |
|
56 |
# Tokenize input sentence
|
57 |
tokens = tokenizer.encode(input, return_tensors="pt")
|
|
|
123 |
def MLearningFormInput(input):
|
124 |
|
125 |
|
126 |
+
tokenizer,model=getModel2()
|
|
|
|
|
|
|
|
|
127 |
|
128 |
tokens = tokenizer.encode(input, return_tensors="pt")
|
129 |
|
|
|
174 |
|
175 |
def getLocationName(input_sentence):
|
176 |
# Model name from Hugging Face model hub
|
177 |
+
tokenizer, model= getModel1()
|
178 |
|
|
|
|
|
|
|
179 |
|
180 |
# Tokenize input sentence
|
181 |
tokens = tokenizer.encode(input_sentence, return_tensors="pt")
|
|
|
268 |
|
269 |
def get50Neigbors(locationID, dataset, k=50):
|
270 |
|
271 |
+
print("neighbor part----------------------------------------------------------------")
|
272 |
+
|
273 |
input_row = dataset.loc[dataset['GeonameID'] == locationID].iloc[0]
|
274 |
|
275 |
|
276 |
lat, lon, geohash,name = input_row['Latitude'], input_row['Longitude'], input_row['Geohash'], input_row['Name']
|
277 |
|
278 |
+
filtered_dataset = dataset.loc[dataset['Geohash'].str.startswith(geohash[:7])].copy()
|
279 |
|
280 |
filtered_dataset['distance'] = filtered_dataset.apply(
|
281 |
lambda row: haversine((lat, lon), (row['Latitude'], row['Longitude']), Unit.KILOMETERS),
|
|
|
283 |
).copy()
|
284 |
|
285 |
|
286 |
+
print("neighbor end----------------------------------------------------------------")
|
287 |
+
|
288 |
+
|
289 |
+
|
290 |
filtered_dataset = filtered_dataset.sort_values(by='distance')
|
291 |
|
292 |
|
|
|
297 |
neighbors=nearest_neighbors.values.tolist()
|
298 |
|
299 |
|
300 |
+
tokenizer, model= getModel1_0()
|
301 |
|
|
|
302 |
|
303 |
sep_token_id = tokenizer.convert_tokens_to_ids(tokenizer.sep_token)
|
304 |
cls_token_id = tokenizer.convert_tokens_to_ids(tokenizer.cls_token)
|
|
|
323 |
|
324 |
#--------------------------------------------
|
325 |
|
|
|
|
|
326 |
|
327 |
tokens = torch.Tensor(neighbor_token_list).unsqueeze(0).long()
|
328 |
|
|
|
344 |
res=cutSlices(outputs.last_hidden_state, [targetIndex])
|
345 |
|
346 |
|
347 |
+
|
348 |
+
|
349 |
+
|
350 |
return res
|
351 |
|
352 |
|
|
|
370 |
|
371 |
def getCSV():
|
372 |
dataset = pd.read_csv('geohash.csv')
|
|
|
373 |
return dataset
|
374 |
|
375 |
+
@st.cache_data
|
376 |
+
|
377 |
+
def getModel1():
|
378 |
+
# Model name from Hugging Face model hub
|
379 |
+
model_name = "zekun-li/geolm-base-toponym-recognition"
|
380 |
+
|
381 |
+
# Load tokenizer and model
|
382 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
383 |
+
model = AutoModelForTokenClassification.from_pretrained(model_name)
|
384 |
+
|
385 |
+
return tokenizer,model
|
386 |
+
|
387 |
+
def getModel1_0():
|
388 |
+
# Model name from Hugging Face model hub
|
389 |
+
model_name = "zekun-li/geolm-base-toponym-recognition"
|
390 |
+
|
391 |
+
# Load tokenizer and model
|
392 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
393 |
+
model = GeoLMModel.from_pretrained(model_name)
|
394 |
+
return tokenizer,model
|
395 |
+
|
396 |
+
|
397 |
+
|
398 |
+
def getModel2():
|
399 |
+
|
400 |
+
model_name = "zekun-li/geolm-base-cased"
|
401 |
+
|
402 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
403 |
+
|
404 |
+
model = GeoLMModel.from_pretrained(model_name)
|
405 |
+
|
406 |
+
return tokenizer,model
|
407 |
+
|
408 |
+
|
409 |
def showing(df):
|
410 |
|
411 |
m = folium.Map(location=[df['lat'].mean(), df['lon'].mean()], zoom_start=5)
|
412 |
|
413 |
+
size_scale = 100
|
414 |
+
color_scale = 255
|
|
|
415 |
for i in range(len(df)):
|
416 |
lat, lon, prob = df.iloc[i]['lat'], df.iloc[i]['lon'], df.iloc[i]['prob']
|
417 |
|
418 |
size = int(prob**2 * size_scale )
|
419 |
color = int(prob**2 * color_scale)
|
420 |
|
421 |
+
# 在Folium地图上添加标记
|
422 |
folium.CircleMarker(
|
423 |
location=[lat, lon],
|
424 |
radius=size,
|
|
|
427 |
fill_color=f'#{color:02X}0000'
|
428 |
).add_to(m)
|
429 |
|
430 |
+
# 保存Folium地图为HTML文件
|
431 |
m.save("map.html")
|
432 |
|
433 |
+
# 在Streamlit中嵌入HTML文件
|
434 |
with open("map.html", "r", encoding="utf-8") as f:
|
435 |
map_html = f.read()
|
436 |
|