ServiceStack
diff --git a/‎.gitignore
+4-1 b/‎.gitignore
+4-1
diff --git a/‎.python-version
+1 b/‎.python-version
+1
diff --git a/‎makefile
+10 b/‎makefile
+10
diff --git a/‎pyproject.toml
+11 b/‎pyproject.toml
+11
diff --git a/‎scripts/01_download_txts.py
-59 b/‎scripts/01_download_txts.py
-59
diff --git a/‎scripts/02_count_tokens.py
-61 b/‎scripts/02_count_tokens.py
-61
diff --git a/‎scripts/__init__.py b/‎scripts/__init__.py
diff --git a/‎scripts/clear.py
+15 b/‎scripts/clear.py
+15
diff --git a/‎scripts/create_product_json.py
+64 b/‎scripts/create_product_json.py
+64
diff --git a/‎scripts/03_create_redirects.py renamed to ‎scripts/create_product_redirects.py
+4-10 b/‎scripts/03_create_redirects.py renamed to ‎scripts/create_product_redirects.py
+4-10
@@ -1,3 +1,6 @@
+scripts/llmstxt-files/
+scripts/run-assets/
+
 # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 
 # dependencies
@@ -122,7 +125,7 @@ ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
-.python-version
+# .python-version
 
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 
@@ -0,0 +1 @@
+3.12
@@ -0,0 +1,10 @@
+
+# source .venv/bin/activate
+
+.PHONY: run
+run:
+	python scripts/run.py
+
+.PHONY: clear
+clear:
+	python scripts/clear.py
@@ -0,0 +1,11 @@
+[project]
+name = "llmstxt-site"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "requests>=2.32.3",
+    "tiktoken>=0.8.0",
+    "tqdm>=4.67.1",
+]
@@ -0,0 +1,15 @@
+import os
+import shutil
+
+# Get the absolute path of the directory containing this script
+script_dir = os.path.dirname(os.path.abspath(__file__))
+
+# Define the folders to delete
+folders_to_delete = ["llmstxt-files", "run-assets"]
+
+# Check and delete each folder if it exists
+for folder in folders_to_delete:
+    folder_path = os.path.join(script_dir, folder)
+    if os.path.exists(folder_path):
+        shutil.rmtree(folder_path)
+        print(f"Deleted folder: {folder}")
@@ -0,0 +1,64 @@
+import requests
+import os
+
+import tiktoken
+
+enc = tiktoken.get_encoding("o200k_base")
+# To get the tokeniser corresponding to a specific model in the OpenAI API
+# enc = tiktoken.encoding_for_model("gpt-4o")
+disallowed_special = enc.special_tokens_set - {"<|endoftext|>"}
+
+
+def create_product_json(product: dict, product_dir: str):
+    product_name = product["product"]
+    product_json = {}
+    product_json["product"] = product_name
+    product_json["website"] = product["website"]
+
+    # Download each file in llms-txt and llms-full-txt and store content
+    files = {}
+    if product["llms-txt"]:
+        files["llms-txt"] = product["llms-txt"]
+    if product["llms-full-txt"]:
+        files["llms-full-txt"] = product["llms-full-txt"]
+
+    file_contents = {}
+    for filename, url in files.items():
+        try:
+            response = requests.get(url)
+            response.raise_for_status()
+
+            # Save file to company directory
+            filepath = os.path.join(product_dir, f"{filename}.txt")
+            with open(filepath, "w", encoding="utf-8") as f:
+                content = response.text
+                tokens = enc.encode(content, disallowed_special=disallowed_special)
+
+                product_json[filename] = url
+                product_json[f"{filename}-tokens"] = len(tokens)
+                f.write(content)
+                file_contents[filename] = content
+            print(f"Downloaded {filename} for {product_name}")
+
+        except requests.RequestException as e:
+            print(f"Error downloading {filename} for {product_name}: {e}")
+        except Exception as e:
+            print(f"Error encoding {filename}: {e}")
+
+    if "llms-full-txt" not in product_json:
+        product_json["llms-full-txt"] = ""
+        product_json["llms-full-txt-tokens"] = None
+    if "llms-txt" not in product_json:
+        product_json["llms-txt"] = ""
+        product_json["llms-txt-tokens"] = None
+
+    # Create combined file with all content
+    if file_contents:
+        combined_filepath = os.path.join(product_dir, "combined.txt")
+        with open(combined_filepath, "w", encoding="utf-8") as f:
+            for filename, content in file_contents.items():
+                f.write(content)
+                f.write("\n\n")
+        print(f"Created combined file for {product_name}")
+
+    return product_json
@@ -1,9 +1,5 @@
-import json
-
-data = json.load(open("data_raw.json"))
-
-redirects = []
-for product in data:
+def create_product_redirects(product: dict):
+    redirects = []
     source_path = product["product"].lower().replace(" ", "-").replace(".", "-")
     destination_path = "#"
     if product["llms-full-txt"]:
@@ -12,7 +8,7 @@
         destination_path = product["llms-txt"]
     else:
         print(f"No destination path for {product['product']}")
-        continue
+        return redirects
 
     redirects.append(
         {
@@ -40,6 +36,4 @@
             }
         )
 
-
-with open("redirects.json", "w") as f:
-    json.dump({"redirects": redirects}, f, indent=4)
+    return redirects