Elron commited on
Commit
80ee1d8
1 Parent(s): 1836605

Upload processors.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. processors.py +32 -2
processors.py CHANGED
@@ -1,8 +1,9 @@
1
  import json
2
  import re
3
- from typing import Any
 
4
 
5
- from .operators import FieldOperator
6
 
7
 
8
  class ToString(FieldOperator):
@@ -117,6 +118,28 @@ class Capitalize(FieldOperator):
117
  return text.capitalize()
118
 
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  class Substring(FieldOperator):
121
  begin: int = 0
122
  end: int = None
@@ -152,6 +175,13 @@ class YesNoToInt(FieldOperator):
152
  return text
153
 
154
 
 
 
 
 
 
 
 
155
  class StrToFloatFormat(FieldOperator):
156
  def process_value(self, text: Any) -> Any:
157
  try:
 
1
  import json
2
  import re
3
+ from difflib import get_close_matches
4
+ from typing import Any, Dict
5
 
6
+ from .operators import FieldOperator, InstanceFieldOperator
7
 
8
 
9
  class ToString(FieldOperator):
 
118
  return text.capitalize()
119
 
120
 
121
+ class GetStringAfter(FieldOperator):
122
+ substring: str
123
+
124
+ def process_value(self, text: Any) -> Any:
125
+ return text.split(self.substring, 1)[-1].strip()
126
+
127
+
128
+ class MatchClosestOption(InstanceFieldOperator):
129
+ options_field: str = "options"
130
+
131
+ def process_instance_value(self, value: Any, instance: Dict[str, Any]):
132
+ options = instance["task_data"][self.options_field]
133
+ return get_close_matches(value, options, n=1, cutoff=0.0)[0]
134
+
135
+
136
+ def process_instance_value(self, value, instance):
137
+ options = instance[self.options_field]
138
+ # Get the closest match; n=1 returns the single closest match
139
+ closest_match = get_close_matches(value, options, n=1, cutoff=0)
140
+ return closest_match[0] if closest_match else None
141
+
142
+
143
  class Substring(FieldOperator):
144
  begin: int = 0
145
  end: int = None
 
175
  return text
176
 
177
 
178
+ class YesToOneElseZero(FieldOperator):
179
+ def process_value(self, text: Any) -> Any:
180
+ if text == "yes":
181
+ return "1"
182
+ return "0"
183
+
184
+
185
  class StrToFloatFormat(FieldOperator):
186
  def process_value(self, text: Any) -> Any:
187
  try: