Sashrika Das
Published

NoteKit

A study tool to help students study using hand-written notes to create flashcards and quizzes, making studying a little more interesting!

IntermediateFull instructions providedOver 1 day302
NoteKit

Things used in this project

Story

Read more

Code

ocr.py

Python
import base64
import os
from dotenv import load_dotenv
from openai import OpenAI
import cv2
import PIL.Image as PILImage
from io import BytesIO
import numpy as np

load_dotenv()

api_key = os.getenv("GEMINI_API_KEY")

print(f"API Key: {api_key}")

client = OpenAI(
    api_key=api_key,
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

# Function to encode the image
# def encode_image(image_path):
#   with open(image_path, "rb") as image_file:
#     return base64.b64encode(image_file.read()).decode('utf-8')
  
def preprocess_image(image_path):
    img = cv2.imread(image_path)
    pil_image = PILImage.fromarray(img)
    buffered = BytesIO()
    pil_image.save(buffered, format="JPEG")

    norm_img = np.zeros((img.shape[0], img.shape[1]))
    img = cv2.normalize(img, norm_img, 0, 255, cv2.NORM_MINMAX)

    #convert to grayscale
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    #denoise
    img = cv2.fastNlMeansDenoising(img, None, 10, 7, 21)


    img = cv2.bitwise_not(img)

    # threshold the image, setting all foreground pixels to
# 255 and all background pixels to 0
    img = cv2.threshold(img, 0, 255,
	  cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

    # grab the (x, y) coordinates of all pixel values that
    # are greater than zero, then use these coordinates to
    # compute a rotated bounding box that contains all
    # coordinates
    # coords = np.column_stack(np.where(img > 0))
    # angle = cv2.minAreaRect(coords)[-1]
    
    # # the `cv2.minAreaRect` function returns values in the
    # # range [-90, 0); as the rectangle rotates clockwise the
    # # returned angle trends to 0 -- in this special case we
    # # need to add 90 degrees to the angle
    # if angle < -45:
    #     angle = -(90 + angle)
    # # otherwise, just take the inverse of the angle to make
    # # it positive
    # else:
    #     angle = -angle
    
    # print("angle",angle)

    # (h, w) = img.shape[:2]
    # center = (w // 2, h // 2)
    # M = cv2.getRotationMatrix2D(center, angle, 1.0)
    # img = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_CUBIC,borderMode=cv2.BORDER_REPLICATE)

    

    # cv2.imshow("norm",img)
    # cv2.waitKey(0)

    # closing all open windows
    #cv2.destroyAllWindows()
    return base64.b64encode(buffered.getvalue()).decode('utf-8')

# Getting the base64 string
# base64_image = encode_image("/Users/sashrika/Project_Workspace/Contests/notekit/datasets/batch-1/images/IMG_2925.JPG")
base64_image = preprocess_image("/home/sashrika/iamge.jpg")


def get_extracted_text_from_image(base64_image):
  response = client.chat.completions.create(
    model="gemini-2.0-flash",
    messages=[
      {
        "role": "user",
        "content": [
          {
            "type": "text",
            "text": "I am an OCR expert and my task is to carfully extract all textual content, including handwritten elements. Be very specific and do not make up any words or characters.",
          },
          {
            "type": "image_url",
            "image_url": {
              "url":  f"data:image/jpeg;base64,{base64_image}"
            },
          },
        ],
      }
    ],
  )
  extracted_text=response.choices[0].message.content
  print(extracted_text)
  return extracted_text

def get_title(extracted_text):
  response = client.chat.completions.create(
    model="gemini-2.0-flash",
    messages=[
      {
        "role": "user",
        "content": [
          {
            "type": "text",
            "text": f"""
            With the given information below, give me one good, concise title describing the notes\n",
            ###
            {extracted_text}
            ###
            """
          },
        
        ],
      }
    ],
  )
  title=response.choices[0].message.content
  print(title)
  return title

def get_flashcard_from_text_summary(extracted_text):
  response = client.chat.completions.create(
    model="gemini-2.0-flash",
    messages=[
      {
        "role": "user",
        "content": [
          {
            "type": "text",
            "text": f"""
            With the given information below, create flashcards that include the important details, including the description/importance.\n",
            ###
            {extracted_text}
            ###
            """
          },
        
        ],
      }
    ],
  )
  summary=response.choices[0].message.content
  print(summary)
  return summary

def get_quiz_from_text_summary(extracted_text):
  response = client.chat.completions.create(
    model="gemini-2.0-flash",
    messages=[
      {
        "role": "user",
        "content": [
          {
            "type": "text",
            "text": f"""
            With the given text below, create an interactive quiz with 3 multiple choice questions(with 3 options), 3 open-ended questions, and 3 true and false questions.\n",
            ###
            {extracted_text}
            ###
            """
          },
          
        ],
      }
    ],
  )
  summary2=response.choices[0].message.content
  print(summary2)
  return summary2

app.py

Python
import streamlit as st
import cv2
import numpy as np
import os
import io
from ocr import preprocess_image, get_extracted_text_from_image, get_flashcard_from_text_summary, get_quiz_from_text_summary, get_title
import sqlite3


conn = sqlite3.connect("data.db")
c = conn.cursor()
c.execute("""CREATE TABLE if not exists notes (
                        quiz text not null,
                        flash_cards text not null,
                        title text not null  
                    );""")

col1, col2 = st.columns([2, 2])

with col1:
    enable = st.checkbox("Enable camera")
    picture = st.camera_input("Take a picture", disabled=not enable)
    


if picture:
    with col1:
        if st.button(":material/search: Look at image:"):
            st.image(picture)
    bytes_data = picture.getvalue()
    cv2_img = cv2.imdecode(np.frombuffer(bytes_data, np.uint8), cv2.IMREAD_COLOR)
    cv2.imwrite('images/cv2_img.jpg', cv2_img)
    base64_image=preprocess_image('images/cv2_img.jpg')
    extracted_text=get_extracted_text_from_image(base64_image)
    title=get_title(extracted_text)
    col2.write(title)
    flash_card=get_flashcard_from_text_summary(extracted_text)
    col2.write(flash_card)
    quiz=get_quiz_from_text_summary(extracted_text)
    col2.write(quiz)
    with conn:
        c.execute("insert into notes values (:quiz,:flash_cards, :title)", {'quiz':quiz ,'flash_cards':flash_card, 'title': title})
        

    with col1:
        options = ["History", "English", "Science"]
        selection = st.pills("Subjects", options, selection_mode="single")
        st.markdown(f"Your selected subject: {selection}.")

streamlit.py

Python
import streamlit as st

pages = {
    "Apps": [
        st.Page("app.py", title="Gather info"),
        st.Page("trial.py", title="Flashcards"),
    ],
}

pg = st.navigation(pages)
pg.run()

topic-flashcards.py

Python
import streamlit as st
from app import get_flashcard_from_text_summary
import sqlite3

col1, col2 = st.columns([2, 2])

col1.write("All flashcards stored here:")

conn = sqlite3.connect("data.db")
c = conn.cursor()

with conn:
    c.execute("select rowid, * from notes")
    notes=c.fetchall()
    print(f"No of notes {len(notes)}")


topic_flashcards = ["History", "English", "Science"]

add_selectbox = st.sidebar.selectbox("Topic Flashcards",(topic_flashcards))

# for element in topic_flashcards:
#     if element == "English":
#         # col1.write(f"{element} is the topic of these flashcards")
#         with st.expander("Quiz"):
#             st.write('''
#             Pspsps quizzes
#         ''')



for note in notes:
    with st.expander(f"#{note[0]}: {note[3]}"):
        # if st.button("Delete"):
        #     delete_note(rowid)
        st.write(f'''
        {note[2]}
    ''')

# def delete_note(rowid):
#     with conn:
#         c.execute("delete from notes where rowid=?",(rowid))

camera.py

Python
import cv2
import numpy as np
from picamera2 import Picamera2, Preview
import time
picam2 = Picamera2()
picam2.start_and_capture_file("test.jpg")

Credits

Sashrika Das
5 projects • 32 followers
I am a 9th grade student, passionate about technology.

Comments