firobeid
/

L4_LSTM_financial_News_Headlines_generator

@@ -1,116 +1,116 @@
----
-tags:
-- text-generation
-- lstm
-- tensorflow
-library_name: tensorflow
-pipeline_tag: text-generation
----
-# LSTM Text Generation Model
-This model was trained using TensorFlow/Keras for financial article generation tasks.
-## Model Details
-- **Model Type**: LSTM
-- **Framework**: TensorFlow/Keras
-- **Task**: Text Generation
-- **Vocabulary Size**: 30000
-- **Architecture**: Bi-directional Long Short-Term Memory (LSTM)
-## Usage
-```python
-from huggingface_hub import snapshot_download
-import tensorflow as tf
-import json
-import pickle
-import numpy as np
-# Download model files
-model_path = snapshot_download(repo_id="firobeid/L4_LSTM_financial_News_Headlines_generator")
-# Load the LSTM model
-model = tf.keras.models.load_model(f"{model_path}/lstm_model")
-# Load tokenizer
-try:
-    # Try JSON format first
-    with open(f"{model_path}/tokenizer.json", 'r', encoding='utf-8') as f:
-        tokenizer_json = f.read()
-    tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(tokenizer_json)
-except FileNotFoundError:
-    # Fallback to pickle format
-    with open(f"{model_path}/tokenizer.pkl", 'rb') as f:
-        tokenizer = pickle.load(f)
-# Text generation function
-import numpy as np
-from tensorflow.keras.preprocessing.sequence import pad_sequences
-def preprocess(texts, max_sequence_length = 71):
-    texts = '<s> %s'.format(texts.lower())
-    X = np.array(tokenizer.texts_to_sequences([texts])) # REMOVE -1
-    pad_encoded = pad_sequences(X,
-                                 maxlen= max_sequence_length,
-                                 padding='pre')
-    return pad_encoded
-def next_word(model, tokenizer,
-              text, num_gen_words=1,
-              randome_sampling = False,
-              temperature=1):
-    '''
-    Randome_Sampling : Using a categorical distribution to predict the character returned by the model
-    Low temperatures results in more predictable text.
-    Higher temperatures results in more surprising text.
-    Experiment to find the best setting.
-    '''
-    input_text = text
-    output_text = [input_text]
-    for i in range(num_gen_words):
-        X_new = preprocess(input_text)
-        if randome_sampling:
-            y_proba = model.predict(X_new, verbose = 0)[0, -1:, :]#first sentence, last token
-            rescaled_logits = tf.math.log(y_proba) / temperature
-            pred_word_ind = tf.random.categorical(rescaled_logits, num_samples=1) #REMOVE THIS + 1
-            pred_word = tokenizer.sequences_to_texts(pred_word_ind.numpy())[0]
-        else:
-            y_proba = model.predict(X_new, verbose=0)[0]  #first sentence
-            pred_word_ind = np.argmax(y_proba, axis = -1) #REMOVE THIS + 1
-            pred_word = tokenizer.index_word[pred_word_ind[-1]]
-        input_text += ' ' + pred_word
-        output_text.append(pred_word)
-        if pred_word == '</s>':
-            return ' '.join(output_text)
-    return ' '.join(output_text)
-def generate_text(model, tokenizer, text, num_gen_words=25, temperature=1, random_sampling=False):
-    return next_word(model, tokenizer, text, num_gen_words, random_sampling, temperature)
-# Example usage
-# Start with these tag: <s>, while keeping words in lower case
-generate_text(model,
-              tokenizer,
-              "Apple",
-              num_gen_words = 10,
-              random_sampling = True,
-              temperature= 10)
-```
-## Training
-This model was trained on text data using LSTM architecture for next-word prediction.
-## Limitations
-- Model performance depends on training data quality and size
-- Generated text may not always be coherent for longer sequences
-- Model architecture is optimized for the specific vocabulary it was trained on

+---
+tags:
+- text-generation
+- lstm
+- tensorflow
+library_name: tensorflow
+pipeline_tag: text-generation
+---
+# LSTM Text Generation Model
+This model was trained using TensorFlow/Keras for financial article generation tasks.
+## Model Details
+- **Model Type**: LSTM
+- **Framework**: TensorFlow/Keras
+- **Task**: Text Generation
+- **Vocabulary Size**: 30000
+- **Architecture**: Bi-directional Long Short-Term Memory (LSTM)
+## Usage
+```python
+from huggingface_hub import snapshot_download
+import tensorflow as tf
+import json
+import pickle
+import numpy as np
+# Download model files
+model_path = snapshot_download(repo_id="firobeid/L4_LSTM_financial_News_Headlines_generator")
+# Load the LSTM model
+model = tf.keras.models.load_model(f"{model_path}/lstm_model")
+# Load tokenizer
+try:
+    # Try JSON format first
+    with open(f"{model_path}/tokenizer.json", 'r', encoding='utf-8') as f:
+        tokenizer_json = f.read()
+    tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(tokenizer_json)
+except FileNotFoundError:
+    # Fallback to pickle format
+    with open(f"{model_path}/tokenizer.pkl", 'rb') as f:
+        tokenizer = pickle.load(f)
+# Text generation function
+import numpy as np
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+def preprocess(texts, max_sequence_length = 71):
+    texts = '<s> {}'.format(texts.lower())
+    X = np.array(tokenizer.texts_to_sequences([texts])) # REMOVE -1
+    pad_encoded = pad_sequences(X,
+                                 maxlen= max_sequence_length,
+                                 padding='pre')
+    return pad_encoded
+def next_word(model, tokenizer,
+              text, num_gen_words=1,
+              randome_sampling = False,
+              temperature=1):
+    '''
+    Randome_Sampling : Using a categorical distribution to predict the character returned by the model
+    Low temperatures results in more predictable text.
+    Higher temperatures results in more surprising text.
+    Experiment to find the best setting.
+    '''
+    input_text = text
+    output_text = [input_text]
+    for i in range(num_gen_words):
+        X_new = preprocess(input_text)
+        if randome_sampling:
+            y_proba = model.predict(X_new, verbose = 0)[0, -1:, :]#first sentence, last token
+            rescaled_logits = tf.math.log(y_proba) / temperature
+            pred_word_ind = tf.random.categorical(rescaled_logits, num_samples=1) #REMOVE THIS + 1
+            pred_word = tokenizer.sequences_to_texts(pred_word_ind.numpy())[0]
+        else:
+            y_proba = model.predict(X_new, verbose=0)[0]  #first sentence
+            pred_word_ind = np.argmax(y_proba, axis = -1) #REMOVE THIS + 1
+            pred_word = tokenizer.index_word[pred_word_ind[-1]]
+        input_text += ' ' + pred_word
+        output_text.append(pred_word)
+        if pred_word == '</s>':
+            return ' '.join(output_text)
+    return ' '.join(output_text)
+def generate_text(model, tokenizer, text, num_gen_words=25, temperature=1, random_sampling=False):
+    return next_word(model, tokenizer, text, num_gen_words, random_sampling, temperature)
+# Example usage
+# Start with these tag: <s>, while keeping words in lower case
+generate_text(model,
+              tokenizer,
+              "Apple",
+              num_gen_words = 10,
+              random_sampling = True,
+              temperature= 10)
+```
+## Training
+This model was trained on text data using LSTM architecture for next-word prediction.
+## Limitations
+- Model performance depends on training data quality and size
+- Generated text may not always be coherent for longer sequences
+- Model architecture is optimized for the specific vocabulary it was trained on