Az-r-ow commited on
Commit
aba5a2f
·
1 Parent(s): db55dfb

feat(processing): option to remove stopwords or not

Browse files
app/travel_resolver/libs/nlp/data_processing.py CHANGED
@@ -38,6 +38,7 @@ def get_tagged_content(sentence: str, tag: str) -> str | None:
38
 
39
  def process_sentence(
40
  sentence: str,
 
41
  stemming: bool = False,
42
  return_tokens: bool = False,
43
  labels_to_adapt: list[int | str] | None = None,
@@ -50,6 +51,7 @@ def process_sentence(
50
 
51
  Args:
52
  sentence (str): The sentence to process.
 
53
  stemming (bool): Whether to stem the tokens.
54
  return_tokens (bool): Whether to return the tokens instead of the sentence.
55
  labels_to_adapt (list[int | str] | None): The labels to adapt.
@@ -68,7 +70,7 @@ def process_sentence(
68
 
69
  for token, label in zip(tokenized_sentence, labels_to_adapt):
70
  # Skipping stopwords
71
- if token in stopwords:
72
  continue
73
  token = token if not stemming else stemmer.stem(token)
74
  processed_sentence += token + " "
 
38
 
39
  def process_sentence(
40
  sentence: str,
41
+ rm_stopwords: bool = False,
42
  stemming: bool = False,
43
  return_tokens: bool = False,
44
  labels_to_adapt: list[int | str] | None = None,
 
51
 
52
  Args:
53
  sentence (str): The sentence to process.
54
+ rm_stopwords (bool): Whether to remove stopwords.
55
  stemming (bool): Whether to stem the tokens.
56
  return_tokens (bool): Whether to return the tokens instead of the sentence.
57
  labels_to_adapt (list[int | str] | None): The labels to adapt.
 
70
 
71
  for token, label in zip(tokenized_sentence, labels_to_adapt):
72
  # Skipping stopwords
73
+ if token in stopwords and rm_stopwords:
74
  continue
75
  token = token if not stemming else stemmer.stem(token)
76
  processed_sentence += token + " "