asiansoul commited on
Commit
364b5d4
β€’
1 Parent(s): b0302cd

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +177 -1
README.md CHANGED
@@ -80,7 +80,7 @@ PARAMETER stop "<s>"
80
  PARAMETER stop "</s>"
81
  ```
82
 
83
- ## πŸ’» Ollama Python Summarizing Test Code
84
 
85
  install all of these libraries
86
  ```
@@ -187,6 +187,182 @@ python pose_test.py url
187
 
188
  You can find both test results below on the section : Test Result
189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  ### πŸ—žοΈ Configuration
191
  The YAML configuration for this model:
192
 
 
80
  PARAMETER stop "</s>"
81
  ```
82
 
83
+ ## πŸ’» Ollama Python Summarizing Normal Test Code
84
 
85
  install all of these libraries
86
  ```
 
187
 
188
  You can find both test results below on the section : Test Result
189
 
190
+ ## πŸ’» Ollama Python Summarizing Test Code for the target lang response
191
+
192
+ install all of these libraries
193
+ ```
194
+ pip install requests beautifulsoup4 PyPDF2 tqdm googletrans==4.0.0-rc1 langchain-community langchain
195
+ ```
196
+
197
+ pose_lang.py
198
+ ```
199
+ import sys
200
+ import os
201
+ import requests
202
+ from bs4 import BeautifulSoup
203
+ import PyPDF2
204
+ from tqdm import tqdm
205
+ from langchain_community.chat_models import ChatOllama
206
+ from langchain.schema import AIMessage, HumanMessage, SystemMessage
207
+ from googletrans import Translator
208
+ import logging
209
+
210
+ # Setup logging
211
+ logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
212
+
213
+ def clean_output(text):
214
+ text = text.replace("</s>", "").strip()
215
+ return text
216
+
217
+ def translate_text(text, src_lang, dest_lang):
218
+ """Translates text from source language to destination language using Google Translate."""
219
+ if src_lang == dest_lang:
220
+ return text
221
+ translator = Translator()
222
+ try:
223
+ translation = translator.translate(text, src=src_lang, dest=dest_lang)
224
+ return translation.text
225
+ except Exception as e:
226
+ logging.error(f"Translation failed: {e}")
227
+ return text
228
+
229
+ def detect_language(text):
230
+ """Detects the language of the given text."""
231
+ translator = Translator()
232
+ try:
233
+ detected = translator.detect(text)
234
+ return detected.lang
235
+ except Exception as e:
236
+ logging.error(f"Language detection failed: {e}")
237
+ return None
238
+
239
+ def invoke_model(text, target_lang):
240
+ """Invokes the chat model and processes the response with language-specific instructions."""
241
+ llm = ChatOllama(model="pose:latest")
242
+ try:
243
+ # Define messages based on target language
244
+ if target_lang == 'ko':
245
+ messages = [
246
+ SystemMessage(content='λ¬Έμ„œμ˜ 핡심 μš”μ•½μ„ μƒμ„Έν•˜κ²Œ μ œκ³΅ν•΄ μ£Όμ‹€ μ „λ¬Έκ°€λ‘œμ„œ, λ‹€μŒ λ¬Έμ„œλ₯Ό μš”μ•½ν•΄ μ£Όμ„Έμš”.'),
247
+ HumanMessage(content=f'λ‹€μŒ ν…μŠ€νŠΈμ— λŒ€ν•œ 전문적 μš”μ•½μ„ μ œκ³΅ν•΄ μ£Όμ„Έμš”. μš”μ•½μ€ {target_lang}μ–΄μ˜ 언어적 λ‰˜μ•™μŠ€μ— 맞게 졜고 μˆ˜μ€€μ˜ λͺ…ν™•μ„±κ³Ό μ„ΈλΆ€ 사항을 μ€€μˆ˜ν•΄μ•Ό ν•©λ‹ˆλ‹€:\n\nTEXT: {text}')
248
+ ]
249
+ else: # default to English if not Korean
250
+ messages = [
251
+ SystemMessage(content='As an adept summarizer, your expertise is required to condense the following document into its essential points in detail.'),
252
+ HumanMessage(content=f'Kindly provide an expert summary of the text below, adhering to the highest standards of clarity and detail. Ensure the response is tailored to the linguistic nuances of {target_lang}:\n\nTEXT: {text}')
253
+ ]
254
+
255
+ response = llm.invoke(messages)
256
+ if isinstance(response, AIMessage):
257
+ cleaned_content = clean_output(response.content)
258
+ content_lang = detect_language(cleaned_content)
259
+ if content_lang != target_lang:
260
+ return translate_text(cleaned_content, content_lang, target_lang)
261
+ return cleaned_content
262
+ else:
263
+ raise ValueError("Model did not return an AIMessage")
264
+ except Exception as e:
265
+ logging.error(f"Error during model invocation: {e}")
266
+ return "Model invocation failed."
267
+
268
+ def fetch_text_from_url(url):
269
+ """Fetches and extracts text content from a given URL."""
270
+ try:
271
+ response = requests.get(url)
272
+ soup = BeautifulSoup(response.content, 'html.parser')
273
+ content = soup.select_one('#mw-content-text, #bodyContent, .content')
274
+ if not content:
275
+ logging.error("No content found in the expected sections.")
276
+ return None
277
+ text_content = ' '.join(p.get_text() for p in content.find_all(['p', 'li'], string=True))
278
+ return text_content
279
+ except Exception as e:
280
+ logging.error(f"Error fetching URL content: {e}")
281
+ return None
282
+
283
+ def read_text_file(file_path):
284
+ """Reads text from a text file."""
285
+ try:
286
+ with open(file_path, "r", encoding="utf-8") as file:
287
+ text_content = file.read()
288
+ return text_content
289
+ except Exception as e:
290
+ logging.error(f"Error reading text file: {e}")
291
+ return None
292
+
293
+ def read_pdf(file_path):
294
+ """Reads text from a PDF file."""
295
+ try:
296
+ with open(file_path, "rb") as file:
297
+ reader = PyPDF2.PdfReader(file)
298
+ text_content = ' '.join(page.extract_text() for page in reader.pages if page.extract_text())
299
+ return text_content
300
+ except Exception as e:
301
+ logging.error(f"Error reading PDF file: {e}")
302
+ return None
303
+
304
+ def summarize_content(source, language):
305
+ """Processes input source (URL, file, text) and outputs a summary in the specified language."""
306
+ print("Processing input...")
307
+ text_content = None
308
+ if source.startswith(('http://', 'https://')):
309
+ print("Fetching content from URL...")
310
+ text_content = fetch_text_from_url(source)
311
+ elif os.path.isfile(source):
312
+ _, file_extension = os.path.splitext(source)
313
+ if file_extension.lower() == '.pdf':
314
+ print("Reading PDF...")
315
+ text_content = read_pdf(source)
316
+ elif file_extension.lower() in ['.txt', '.text']:
317
+ print("Reading text file...")
318
+ text_content = read_text_file(source)
319
+ else:
320
+ print("Unsupported file type")
321
+ return
322
+ else:
323
+ print("Unsupported file type")
324
+ return
325
+
326
+ if text_content:
327
+ print("Summarizing content...")
328
+ summary = invoke_model(text_content, language)
329
+ print("\n--- Summary of the document ---\n")
330
+ print(summary)
331
+ else:
332
+ print("No text found or unable to extract text from source.")
333
+
334
+ if __name__ == '__main__':
335
+ if len(sys.argv) < 3:
336
+ print("Usage: python script.py <file_path_or_url_or_text> <language>")
337
+ print("Language should be 'ko' for Korean or 'en' for English.")
338
+ else:
339
+ source = sys.argv[1]
340
+ language = sys.argv[2]
341
+ summarize_content(source, language)
342
+
343
+
344
+ ```
345
+
346
+ run txt file (assume txt is a.txt)
347
+ ```
348
+ Korean response : python pose_lang a.txt ko
349
+ English response : python pose_lang a.txt en
350
+ ```
351
+
352
+ run pdf file (assume pdf is a.pdf)
353
+ ```
354
+ Korean response : python pose_lang a.pdf ko
355
+ English response : python pose_lang a.pdf en
356
+ ```
357
+
358
+ run url (assume url is wikepedia)
359
+ ```
360
+ Korean response : python pose_lang url ko
361
+ English response : python pose_lang url en
362
+ ```
363
+
364
+ You can find both test results below on the section : Test Result for target lang response
365
+
366
  ### πŸ—žοΈ Configuration
367
  The YAML configuration for this model:
368