| from inputs.fields.field import Field | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| class RawTokenField(Field): | |
| """This Class preserves raw text of tokens | |
| """ | |
| def __init__(self, namespace, source_key): | |
| """This function sets namesapce of field, dataset source key | |
| Arguments: | |
| namespace {str} -- namesapce of field | |
| source_key {str} -- indicate key in text data | |
| """ | |
| super().__init__() | |
| self.namespace = str(namespace) | |
| self.source_key = str(source_key) | |
| def count_vocab_items(self, counter, sentences): | |
| """ `RawTokenField` doesn't update counter | |
| Arguments: | |
| counter {dict} -- counter | |
| sentences {list} -- text content after preprocessing | |
| """ | |
| pass | |
| def index(self, instance, vocab, sentences): | |
| """This function doesn't use vocabulary, | |
| perserve raw text of sentences(tokens) | |
| Arguments: | |
| instance {dict} -- numerical represenration of text data | |
| vocab {Vocabulary} -- vocabulary | |
| sentences {list} -- text content after preprocessing | |
| """ | |
| for sentence in sentences: | |
| instance[self.namespace].append([token for token in sentence[self.source_key]]) | |
| logger.info("Index sentences {} to construct instance namespace {} successfully.".format( | |
| self.source_key, self.namespace)) | |