TymaaHammouda commited on
Commit
6125895
·
1 Parent(s): c479870

Update entities

Browse files
Files changed (2) hide show
  1. IBO_to_XML.py +52 -52
  2. app.py +2 -2
IBO_to_XML.py CHANGED
@@ -75,61 +75,61 @@ def IBO_to_XML(temp):
75
  return xml_output.strip()
76
 
77
 
78
- def sortTags(entities):
 
79
  temp_counter = 0
80
  # For each entity, this loop will sort each tag of entitiy, first it will check if the
81
  # previous tags has same count of this tag, second will sort the tags and check if this tags is correct
82
- for temp_entities in entities:
83
- for entity in temp_entities[1]:
84
- tags = entity.split()
85
- for tag in tags:
86
- # if the counter is not 0 then, will complete
87
- if temp_counter != 0:
88
- # Check if this tag is equal I-, if yes then it will count how many tag in this tags and
89
- # count how many tag in previous tags
90
- if "I-" == tag[0:2]:
91
- counter_of_this_tag = 0
92
- counter_of_previous_tag = 0
93
- for word in tags:
94
- if tag.split("-")[1] in word:
95
- counter_of_this_tag+=1
96
- for word in entity[temp_counter-1][1].split():
97
- if tag.split("-")[1] in word:
98
- counter_of_previous_tag+=1
99
- # if the counter of previous tag is bigger than counter of this tag, then we
100
- # need to add I-tag in this tags
101
- if counter_of_previous_tag > counter_of_this_tag:
102
- tags.append("I-"+tag.split("-")[1])
103
- # Sort the tags
104
- tags.sort()
105
- # Need to revers the tags because it should begins with I
106
- tags.reverse()
107
- # If the counter is not 0 then we can complete
108
  if temp_counter != 0:
109
- this_tags = tags
110
- previous_tags = entity[temp_counter - 1][1].split()
111
- sorted_tags = list()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
- # Check if the this tag is not O and previous tags is not O, then will complete,
114
- # if not then it will ignor this tag
115
- if "O" not in this_tags and "O" not in previous_tags:
116
- index = 0
117
- #For each previous tags, need sort this tag by previous tags if its I, B we can ignor
118
- for i in previous_tags:
119
- j = 0
120
- while this_tags and j < len(this_tags):
121
- if this_tags[j][0:2] == "I-" and this_tags[j][2:] == i[2:]:
122
- sorted_tags.insert(index, this_tags.pop(j))
123
- break
124
- elif this_tags[j][0:2] == "B-":
125
- break
126
- j += 1
127
- index += 1
128
- sorted_tags += this_tags
129
- tags = sorted_tags
130
- str_tag = " "
131
- str_tag = str_tag.join(tags)
132
- str_tag = str_tag.strip()
133
- temp_entities[temp_counter][1] = str_tag
134
- temp_counter += 1
135
  return temp_entities
 
75
  return xml_output.strip()
76
 
77
 
78
+ def sortTags(entities):
79
+ temp_entities = entities
80
  temp_counter = 0
81
  # For each entity, this loop will sort each tag of entitiy, first it will check if the
82
  # previous tags has same count of this tag, second will sort the tags and check if this tags is correct
83
+ for entity in temp_entities:
84
+ tags = entity[1].split()
85
+ for tag in tags:
86
+ # if the counter is not 0 then, will complete
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  if temp_counter != 0:
88
+ # Check if this tag is equal I-, if yes then it will count how many tag in this tags and
89
+ # count how many tag in previous tags
90
+ if "I-" == tag[0:2]:
91
+ counter_of_this_tag = 0
92
+ counter_of_previous_tag = 0
93
+ for word in tags:
94
+ if tag.split("-")[1] in word:
95
+ counter_of_this_tag+=1
96
+ for word in temp_entities[temp_counter-1][1].split():
97
+ if tag.split("-")[1] in word:
98
+ counter_of_previous_tag+=1
99
+ # if the counter of previous tag is bigger than counter of this tag, then we
100
+ # need to add I-tag in this tags
101
+ if counter_of_previous_tag > counter_of_this_tag:
102
+ tags.append("I-"+tag.split("-")[1])
103
+ # Sort the tags
104
+ tags.sort()
105
+ # Need to revers the tags because it should begins with I
106
+ tags.reverse()
107
+ # If the counter is not 0 then we can complete
108
+ if temp_counter != 0:
109
+ this_tags = tags
110
+ previous_tags = temp_entities[temp_counter - 1][1].split()
111
+ sorted_tags = list()
112
 
113
+ # Check if the this tag is not O and previous tags is not O, then will complete,
114
+ # if not then it will ignor this tag
115
+ if "O" not in this_tags and "O" not in previous_tags:
116
+ index = 0
117
+ #For each previous tags, need sort this tag by previous tags if its I, B we can ignor
118
+ for i in previous_tags:
119
+ j = 0
120
+ while this_tags and j < len(this_tags):
121
+ if this_tags[j][0:2] == "I-" and this_tags[j][2:] == i[2:]:
122
+ sorted_tags.insert(index, this_tags.pop(j))
123
+ break
124
+ elif this_tags[j][0:2] == "B-":
125
+ break
126
+ j += 1
127
+ index += 1
128
+ sorted_tags += this_tags
129
+ tags = sorted_tags
130
+ str_tag = " "
131
+ str_tag = str_tag.join(tags)
132
+ str_tag = str_tag.strip()
133
+ temp_entities[temp_counter][1] = str_tag
134
+ temp_counter += 1
135
  return temp_entities
app.py CHANGED
@@ -148,9 +148,9 @@ def extract(sentence):
148
  list_of_tags = [i for i in list_of_tags if i not in ("O", " ", "")]
149
 
150
  if not list_of_tags:
151
- item["tags"] = ["O"]
152
  else:
153
- item["tags"] = list_of_tags
154
  lists.append(item)
155
  return lists
156
 
 
148
  list_of_tags = [i for i in list_of_tags if i not in ("O", " ", "")]
149
 
150
  if not list_of_tags:
151
+ item["tags"] = "O"
152
  else:
153
+ item["tags"] = " ".join(list_of_tags)
154
  lists.append(item)
155
  return lists
156