diff --git a/README.md b/README.md
index 3c4ab7b..26e6d21 100644
--- a/README.md
+++ b/README.md
@@ -27,6 +27,9 @@ huak run main
 
 # pre-processing data
 huak run preproc
+
+# detect AI-generated data
+huak run detect
 ```
 
 ## Format
diff --git a/output/embeddings_w_lsh.csv b/output/preprocessed_data.csv
similarity index 100%
rename from output/embeddings_w_lsh.csv
rename to output/preprocessed_data.csv
diff --git a/pyproject.toml b/pyproject.toml
index 41344f8..5c6dc6b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,3 +23,4 @@ dev = [
 [tool.huak.task]
 main = "python3 src/semantic_hashing_demo/main.py"
 preproc = "python3 src/semantic_hashing_demo/preprocessing.py"
+detect = "python3 src/semantic_hashing_demo/detection.py"
diff --git a/src/semantic_hashing_demo/config.py b/src/semantic_hashing_demo/config.py
index 7f692da..5a9d60e 100644
--- a/src/semantic_hashing_demo/config.py
+++ b/src/semantic_hashing_demo/config.py
@@ -12,6 +12,8 @@
 # So, parse accordingly depending on the computational resources for bucketing.
 data_file = "./data/fine_food_reviews_1k.csv"
 
+preprocessed_data_file = "./output/preprocessed_data.csv"
+
 # no. of text samples
 n = 20
 
diff --git a/src/semantic_hashing_demo/detection.py b/src/semantic_hashing_demo/detection.py
new file mode 100644
index 0000000..21d5646
--- /dev/null
+++ b/src/semantic_hashing_demo/detection.py
@@ -0,0 +1,24 @@
+""" 
+The end goal is to detect AI-generated text.
+"""
+import polars as pl
+from config import preprocessed_data_file
+
+
+def main():
+    # Specify data types for multiple columns to be read as strings
+    dtype_spec = {
+        "Hash 8-bit": str,
+        "Hash 16-bit": str,
+        "Hash 32-bit": str,
+        "Hash 64-bit": str,
+        "Hash 128-bit": str,
+    }
+
+    # pull data into polars dataframe
+    df = pl.read_csv(preprocessed_data_file, dtypes=dtype_spec)
+    print(df.head())
+
+
+if __name__ == "__main__":
+    main()