Update README.md
Browse files
README.md
CHANGED
@@ -80,7 +80,7 @@ PARAMETER stop "<s>"
|
|
80 |
PARAMETER stop "</s>"
|
81 |
```
|
82 |
|
83 |
-
## π» Ollama Python Summarizing Test Code
|
84 |
|
85 |
install all of these libraries
|
86 |
```
|
@@ -187,6 +187,182 @@ python pose_test.py url
|
|
187 |
|
188 |
You can find both test results below on the section : Test Result
|
189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
### ποΈ Configuration
|
191 |
The YAML configuration for this model:
|
192 |
|
|
|
80 |
PARAMETER stop "</s>"
|
81 |
```
|
82 |
|
83 |
+
## π» Ollama Python Summarizing Normal Test Code
|
84 |
|
85 |
install all of these libraries
|
86 |
```
|
|
|
187 |
|
188 |
You can find both test results below on the section : Test Result
|
189 |
|
190 |
+
## π» Ollama Python Summarizing Test Code for the target lang response
|
191 |
+
|
192 |
+
install all of these libraries
|
193 |
+
```
|
194 |
+
pip install requests beautifulsoup4 PyPDF2 tqdm googletrans==4.0.0-rc1 langchain-community langchain
|
195 |
+
```
|
196 |
+
|
197 |
+
pose_lang.py
|
198 |
+
```
|
199 |
+
import sys
|
200 |
+
import os
|
201 |
+
import requests
|
202 |
+
from bs4 import BeautifulSoup
|
203 |
+
import PyPDF2
|
204 |
+
from tqdm import tqdm
|
205 |
+
from langchain_community.chat_models import ChatOllama
|
206 |
+
from langchain.schema import AIMessage, HumanMessage, SystemMessage
|
207 |
+
from googletrans import Translator
|
208 |
+
import logging
|
209 |
+
|
210 |
+
# Setup logging
|
211 |
+
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
212 |
+
|
213 |
+
def clean_output(text):
|
214 |
+
text = text.replace("</s>", "").strip()
|
215 |
+
return text
|
216 |
+
|
217 |
+
def translate_text(text, src_lang, dest_lang):
|
218 |
+
"""Translates text from source language to destination language using Google Translate."""
|
219 |
+
if src_lang == dest_lang:
|
220 |
+
return text
|
221 |
+
translator = Translator()
|
222 |
+
try:
|
223 |
+
translation = translator.translate(text, src=src_lang, dest=dest_lang)
|
224 |
+
return translation.text
|
225 |
+
except Exception as e:
|
226 |
+
logging.error(f"Translation failed: {e}")
|
227 |
+
return text
|
228 |
+
|
229 |
+
def detect_language(text):
|
230 |
+
"""Detects the language of the given text."""
|
231 |
+
translator = Translator()
|
232 |
+
try:
|
233 |
+
detected = translator.detect(text)
|
234 |
+
return detected.lang
|
235 |
+
except Exception as e:
|
236 |
+
logging.error(f"Language detection failed: {e}")
|
237 |
+
return None
|
238 |
+
|
239 |
+
def invoke_model(text, target_lang):
|
240 |
+
"""Invokes the chat model and processes the response with language-specific instructions."""
|
241 |
+
llm = ChatOllama(model="pose:latest")
|
242 |
+
try:
|
243 |
+
# Define messages based on target language
|
244 |
+
if target_lang == 'ko':
|
245 |
+
messages = [
|
246 |
+
SystemMessage(content='λ¬Έμμ ν΅μ¬ μμ½μ μμΈνκ² μ κ³΅ν΄ μ£Όμ€ μ λ¬Έκ°λ‘μ, λ€μ λ¬Έμλ₯Ό μμ½ν΄ μ£ΌμΈμ.'),
|
247 |
+
HumanMessage(content=f'λ€μ ν
μ€νΈμ λν μ λ¬Έμ μμ½μ μ κ³΅ν΄ μ£ΌμΈμ. μμ½μ {target_lang}μ΄μ μΈμ΄μ λμμ€μ λ§κ² μ΅κ³ μμ€μ λͺ
νμ±κ³Ό μΈλΆ μ¬νμ μ€μν΄μΌ ν©λλ€:\n\nTEXT: {text}')
|
248 |
+
]
|
249 |
+
else: # default to English if not Korean
|
250 |
+
messages = [
|
251 |
+
SystemMessage(content='As an adept summarizer, your expertise is required to condense the following document into its essential points in detail.'),
|
252 |
+
HumanMessage(content=f'Kindly provide an expert summary of the text below, adhering to the highest standards of clarity and detail. Ensure the response is tailored to the linguistic nuances of {target_lang}:\n\nTEXT: {text}')
|
253 |
+
]
|
254 |
+
|
255 |
+
response = llm.invoke(messages)
|
256 |
+
if isinstance(response, AIMessage):
|
257 |
+
cleaned_content = clean_output(response.content)
|
258 |
+
content_lang = detect_language(cleaned_content)
|
259 |
+
if content_lang != target_lang:
|
260 |
+
return translate_text(cleaned_content, content_lang, target_lang)
|
261 |
+
return cleaned_content
|
262 |
+
else:
|
263 |
+
raise ValueError("Model did not return an AIMessage")
|
264 |
+
except Exception as e:
|
265 |
+
logging.error(f"Error during model invocation: {e}")
|
266 |
+
return "Model invocation failed."
|
267 |
+
|
268 |
+
def fetch_text_from_url(url):
|
269 |
+
"""Fetches and extracts text content from a given URL."""
|
270 |
+
try:
|
271 |
+
response = requests.get(url)
|
272 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
273 |
+
content = soup.select_one('#mw-content-text, #bodyContent, .content')
|
274 |
+
if not content:
|
275 |
+
logging.error("No content found in the expected sections.")
|
276 |
+
return None
|
277 |
+
text_content = ' '.join(p.get_text() for p in content.find_all(['p', 'li'], string=True))
|
278 |
+
return text_content
|
279 |
+
except Exception as e:
|
280 |
+
logging.error(f"Error fetching URL content: {e}")
|
281 |
+
return None
|
282 |
+
|
283 |
+
def read_text_file(file_path):
|
284 |
+
"""Reads text from a text file."""
|
285 |
+
try:
|
286 |
+
with open(file_path, "r", encoding="utf-8") as file:
|
287 |
+
text_content = file.read()
|
288 |
+
return text_content
|
289 |
+
except Exception as e:
|
290 |
+
logging.error(f"Error reading text file: {e}")
|
291 |
+
return None
|
292 |
+
|
293 |
+
def read_pdf(file_path):
|
294 |
+
"""Reads text from a PDF file."""
|
295 |
+
try:
|
296 |
+
with open(file_path, "rb") as file:
|
297 |
+
reader = PyPDF2.PdfReader(file)
|
298 |
+
text_content = ' '.join(page.extract_text() for page in reader.pages if page.extract_text())
|
299 |
+
return text_content
|
300 |
+
except Exception as e:
|
301 |
+
logging.error(f"Error reading PDF file: {e}")
|
302 |
+
return None
|
303 |
+
|
304 |
+
def summarize_content(source, language):
|
305 |
+
"""Processes input source (URL, file, text) and outputs a summary in the specified language."""
|
306 |
+
print("Processing input...")
|
307 |
+
text_content = None
|
308 |
+
if source.startswith(('http://', 'https://')):
|
309 |
+
print("Fetching content from URL...")
|
310 |
+
text_content = fetch_text_from_url(source)
|
311 |
+
elif os.path.isfile(source):
|
312 |
+
_, file_extension = os.path.splitext(source)
|
313 |
+
if file_extension.lower() == '.pdf':
|
314 |
+
print("Reading PDF...")
|
315 |
+
text_content = read_pdf(source)
|
316 |
+
elif file_extension.lower() in ['.txt', '.text']:
|
317 |
+
print("Reading text file...")
|
318 |
+
text_content = read_text_file(source)
|
319 |
+
else:
|
320 |
+
print("Unsupported file type")
|
321 |
+
return
|
322 |
+
else:
|
323 |
+
print("Unsupported file type")
|
324 |
+
return
|
325 |
+
|
326 |
+
if text_content:
|
327 |
+
print("Summarizing content...")
|
328 |
+
summary = invoke_model(text_content, language)
|
329 |
+
print("\n--- Summary of the document ---\n")
|
330 |
+
print(summary)
|
331 |
+
else:
|
332 |
+
print("No text found or unable to extract text from source.")
|
333 |
+
|
334 |
+
if __name__ == '__main__':
|
335 |
+
if len(sys.argv) < 3:
|
336 |
+
print("Usage: python script.py <file_path_or_url_or_text> <language>")
|
337 |
+
print("Language should be 'ko' for Korean or 'en' for English.")
|
338 |
+
else:
|
339 |
+
source = sys.argv[1]
|
340 |
+
language = sys.argv[2]
|
341 |
+
summarize_content(source, language)
|
342 |
+
|
343 |
+
|
344 |
+
```
|
345 |
+
|
346 |
+
run txt file (assume txt is a.txt)
|
347 |
+
```
|
348 |
+
Korean response : python pose_lang a.txt ko
|
349 |
+
English response : python pose_lang a.txt en
|
350 |
+
```
|
351 |
+
|
352 |
+
run pdf file (assume pdf is a.pdf)
|
353 |
+
```
|
354 |
+
Korean response : python pose_lang a.pdf ko
|
355 |
+
English response : python pose_lang a.pdf en
|
356 |
+
```
|
357 |
+
|
358 |
+
run url (assume url is wikepedia)
|
359 |
+
```
|
360 |
+
Korean response : python pose_lang url ko
|
361 |
+
English response : python pose_lang url en
|
362 |
+
```
|
363 |
+
|
364 |
+
You can find both test results below on the section : Test Result for target lang response
|
365 |
+
|
366 |
### ποΈ Configuration
|
367 |
The YAML configuration for this model:
|
368 |
|