Elron commited on
Commit
8977100
1 Parent(s): 212beb8

Upload metric.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. metric.py +94 -38
metric.py CHANGED
@@ -1,16 +1,13 @@
1
- from dataclasses import field
2
- from typing import Any, Dict, Generator, Iterable, List, Optional, Union
3
 
4
- import datasets
5
  import evaluate
6
- from datasets import Features, Sequence, Value
7
 
8
  from .artifact import __file__ as _
9
  from .blocks import __file__ as _
10
  from .card import __file__ as _
11
  from .catalog import __file__ as _
12
  from .collections import __file__ as _
13
- from .common import __file__ as _
14
  from .dataclass import __file__ as _
15
  from .dict_utils import __file__ as _
16
  from .file_utils import __file__ as _
@@ -23,20 +20,12 @@ from .load import __file__ as _
23
  from .loaders import __file__ as _
24
  from .metrics import __file__ as _
25
  from .normalizers import __file__ as _
26
- from .operator import (
27
- MultiStreamOperator,
28
- SequntialOperator,
29
- SequntialOperatorInitilizer,
30
- StreamInitializerOperator,
31
- )
32
  from .operator import __file__ as _
33
- from .operators import (
34
- ApplyOperatorsField,
35
- ApplyStreamOperatorsField,
36
- FlattenInstances,
37
- MergeStreams,
38
- SplitByValue,
39
- )
40
  from .operators import __file__ as _
41
  from .processors import __file__ as _
42
  from .random_utils import __file__ as _
@@ -44,6 +33,7 @@ from .recipe import __file__ as _
44
  from .register import __file__ as _
45
  from .register import _reset_env_local_catalogs, register_all_artifacts
46
  from .renderers import __file__ as _
 
47
  from .schema import __file__ as _
48
  from .split_utils import __file__ as _
49
  from .splitters import __file__ as _
@@ -75,12 +65,31 @@ class MultiStreamScoreMean(MultiStreamOperator):
75
  instance["score"]["global"]["groups_mean_score"] = score
76
  yield instance
77
 
 
 
 
 
 
 
 
78
  def process(self, multi_stream: MultiStream) -> MultiStream:
79
- mean_score = self.aggegate_results(multi_stream)
 
 
 
 
 
 
 
 
 
80
 
 
81
  result = {}
82
  for stream_name, stream in multi_stream.items():
83
- result[stream_name] = Stream(self.spread_results, gen_kwargs={"stream": stream, "score": mean_score})
 
 
84
 
85
  return MultiStream(result)
86
 
@@ -90,20 +99,41 @@ class FromPredictionsAndOriginalData(StreamInitializerOperator):
90
  for prediction, original in zip(predictions, references):
91
  yield {**original, "prediction": prediction}
92
 
93
- def process(self, predictions: List[str], references: Iterable, split_name: str = "all") -> MultiStream:
 
 
94
  return MultiStream(
95
- {split_name: Stream(self.zip, gen_kwargs={"predictions": predictions, "references": references})}
 
 
 
 
 
96
  )
97
 
98
 
99
- from .schema import UNITXT_DATASET_SCHEMA
 
 
 
 
 
 
 
100
 
101
 
102
- class MetricRecipe(SequntialOperatorInitilizer):
 
 
103
  def prepare(self):
104
  register_all_artifacts()
105
  self.steps = [
106
  FromPredictionsAndOriginalData(),
 
 
 
 
 
107
  ApplyOperatorsField(
108
  inputs_fields=["prediction", "references"],
109
  fields_to_treat_as_list=["references"],
@@ -111,37 +141,48 @@ class MetricRecipe(SequntialOperatorInitilizer):
111
  default_operators=["processors.to_string_stripped"],
112
  ),
113
  SplitByValue(["group"]),
114
- ApplyStreamOperatorsField(
115
  "metrics",
116
- reversed=True,
117
  ),
118
  MultiStreamScoreMean(),
119
  MergeStreams(),
120
  ]
121
 
122
 
123
- UNITXT_METRIC_SCHEMA = Features({"predictions": Value("string"), "references": dict(UNITXT_DATASET_SCHEMA)})
 
 
124
 
125
 
126
- def _compute(predictions: List[str], references: Iterable, flatten: bool = False, split_name: str = "all"):
 
 
 
 
 
 
127
  _reset_env_local_catalogs()
128
  register_all_artifacts()
129
- recipe = MetricRecipe()
130
 
131
- multi_stream = recipe(predictions=predictions, references=references, split_name=split_name)
 
 
132
 
133
  if flatten:
134
  operator = FlattenInstances()
135
  multi_stream = operator(multi_stream)
136
 
137
  stream = multi_stream[split_name]
138
-
139
  return list(stream)
140
 
141
 
142
  # TODO: currently we have two classes with this name. metric.Metric and matrics.Metric...
143
  # @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
144
  class Metric(evaluate.Metric):
 
 
145
  def _info(self):
146
  return evaluate.MetricInfo(
147
  description="_DESCRIPTION",
@@ -155,11 +196,16 @@ class Metric(evaluate.Metric):
155
  ],
156
  )
157
 
158
- def _compute(self, predictions: List[str], references: Iterable, flatten: bool = False, split_name: str = "all"):
 
 
 
 
 
 
159
  try:
160
- from unitxt.dataset import (
161
- get_dataset_artifact as get_dataset_artifact_installed,
162
- )
163
 
164
  unitxt_installed = True
165
  except ImportError:
@@ -169,7 +215,17 @@ class Metric(evaluate.Metric):
169
  from unitxt.metric import _compute as _compute_installed
170
 
171
  return _compute_installed(
172
- predictions=predictions, references=references, flatten=flatten, split_name=split_name
 
 
 
 
173
  )
174
- else:
175
- return _compute(predictions=predictions, references=references, flatten=flatten, split_name=split_name)
 
 
 
 
 
 
 
1
+ from typing import Dict, Iterable, List
 
2
 
 
3
  import evaluate
4
+ from datasets import Features, Value
5
 
6
  from .artifact import __file__ as _
7
  from .blocks import __file__ as _
8
  from .card import __file__ as _
9
  from .catalog import __file__ as _
10
  from .collections import __file__ as _
 
11
  from .dataclass import __file__ as _
12
  from .dict_utils import __file__ as _
13
  from .file_utils import __file__ as _
 
20
  from .loaders import __file__ as _
21
  from .metrics import __file__ as _
22
  from .normalizers import __file__ as _
23
+ from .operator import (MultiStreamOperator, SequentialOperator,
24
+ SequentialOperatorInitilizer, StreamInitializerOperator)
 
 
 
 
25
  from .operator import __file__ as _
26
+ from .operators import (Apply, ApplyMetric, ApplyOperatorsField,
27
+ ApplyStreamOperatorsField, FlattenInstances,
28
+ MergeStreams, SplitByValue)
 
 
 
 
29
  from .operators import __file__ as _
30
  from .processors import __file__ as _
31
  from .random_utils import __file__ as _
 
33
  from .register import __file__ as _
34
  from .register import _reset_env_local_catalogs, register_all_artifacts
35
  from .renderers import __file__ as _
36
+ from .schema import UNITXT_DATASET_SCHEMA
37
  from .schema import __file__ as _
38
  from .split_utils import __file__ as _
39
  from .splitters import __file__ as _
 
65
  instance["score"]["global"]["groups_mean_score"] = score
66
  yield instance
67
 
68
+ def spread_results_one_stream(self, stream: Stream):
69
+ for instance in stream:
70
+ instance["score"]["global"]["groups_mean_score"] = instance["score"][
71
+ "global"
72
+ ]["score"]
73
+ yield instance
74
+
75
  def process(self, multi_stream: MultiStream) -> MultiStream:
76
+ result = {}
77
+
78
+ # optimization in to avoid double calculation of metrics
79
+ # when aggregating results, if there is only one stream.
80
+ if len(multi_stream) == 1:
81
+ for stream_name, stream in multi_stream.items():
82
+ result[stream_name] = Stream(
83
+ self.spread_results_one_stream, gen_kwargs={"stream": stream}
84
+ )
85
+ return MultiStream(result)
86
 
87
+ mean_score = self.aggegate_results(multi_stream)
88
  result = {}
89
  for stream_name, stream in multi_stream.items():
90
+ result[stream_name] = Stream(
91
+ self.spread_results, gen_kwargs={"stream": stream, "score": mean_score}
92
+ )
93
 
94
  return MultiStream(result)
95
 
 
99
  for prediction, original in zip(predictions, references):
100
  yield {**original, "prediction": prediction}
101
 
102
+ def process(
103
+ self, predictions: List[str], references: Iterable, split_name: str = "all"
104
+ ) -> MultiStream:
105
  return MultiStream(
106
+ {
107
+ split_name: Stream(
108
+ self.zip,
109
+ gen_kwargs={"predictions": predictions, "references": references},
110
+ )
111
+ }
112
  )
113
 
114
 
115
+ # The additional_inputs field in the schema is defined as
116
+ # Sequence({"key": Value(dtype="string"), "value": Value("string")})
117
+ # When receiving instances from this scheme, the keys and values are returned as two separate
118
+ # lists, and are converted to a dictionary.
119
+
120
+
121
+ def _from_key_value_pairs(key_value_list: Dict[str, list]) -> Dict[str, str]:
122
+ return dict(zip(key_value_list["key"], key_value_list["value"]))
123
 
124
 
125
+ class MetricRecipe(SequentialOperatorInitilizer):
126
+ calc_confidence_intervals: bool = True
127
+
128
  def prepare(self):
129
  register_all_artifacts()
130
  self.steps = [
131
  FromPredictionsAndOriginalData(),
132
+ Apply(
133
+ "additional_inputs",
134
+ function=_from_key_value_pairs,
135
+ to_field="additional_inputs",
136
+ ),
137
  ApplyOperatorsField(
138
  inputs_fields=["prediction", "references"],
139
  fields_to_treat_as_list=["references"],
 
141
  default_operators=["processors.to_string_stripped"],
142
  ),
143
  SplitByValue(["group"]),
144
+ ApplyMetric(
145
  "metrics",
146
+ calc_confidence_intervals=self.calc_confidence_intervals,
147
  ),
148
  MultiStreamScoreMean(),
149
  MergeStreams(),
150
  ]
151
 
152
 
153
+ UNITXT_METRIC_SCHEMA = Features(
154
+ {"predictions": Value("string"), "references": dict(UNITXT_DATASET_SCHEMA)}
155
+ )
156
 
157
 
158
+ def _compute(
159
+ predictions: List[str],
160
+ references: Iterable,
161
+ flatten: bool = False,
162
+ split_name: str = "all",
163
+ calc_confidence_intervals: bool = True,
164
+ ):
165
  _reset_env_local_catalogs()
166
  register_all_artifacts()
167
+ recipe = MetricRecipe(calc_confidence_intervals=calc_confidence_intervals)
168
 
169
+ multi_stream = recipe(
170
+ predictions=predictions, references=references, split_name=split_name
171
+ )
172
 
173
  if flatten:
174
  operator = FlattenInstances()
175
  multi_stream = operator(multi_stream)
176
 
177
  stream = multi_stream[split_name]
 
178
  return list(stream)
179
 
180
 
181
  # TODO: currently we have two classes with this name. metric.Metric and matrics.Metric...
182
  # @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
183
  class Metric(evaluate.Metric):
184
+ calc_confidence_intervals: bool = True
185
+
186
  def _info(self):
187
  return evaluate.MetricInfo(
188
  description="_DESCRIPTION",
 
196
  ],
197
  )
198
 
199
+ def _compute(
200
+ self,
201
+ predictions: List[str],
202
+ references: Iterable,
203
+ flatten: bool = False,
204
+ split_name: str = "all",
205
+ ):
206
  try:
207
+ from unitxt.dataset import \
208
+ get_dataset_artifact as get_dataset_artifact_installed
 
209
 
210
  unitxt_installed = True
211
  except ImportError:
 
215
  from unitxt.metric import _compute as _compute_installed
216
 
217
  return _compute_installed(
218
+ predictions=predictions,
219
+ references=references,
220
+ flatten=flatten,
221
+ split_name=split_name,
222
+ calc_confidence_intervals=self.calc_confidence_intervals,
223
  )
224
+
225
+ return _compute(
226
+ predictions=predictions,
227
+ references=references,
228
+ flatten=flatten,
229
+ split_name=split_name,
230
+ calc_confidence_intervals=self.calc_confidence_intervals,
231
+ )