How to encrypt text with a password in python?
Based on zwer's answers but shows an example attempt to deal with the case where the source text is exactly a multiple of 16 (AES.block_size
). However @zwer explains in a comment how this code will BREAK THE ENCRYPTION of your text by not padding your source text appropriately, making your pipeline insecure.
Code:
from builtins import bytes
import base64
from Crypto.Cipher import AES
from Crypto.Hash import SHA256
from Crypto import Random
def encrypt(string, password):
"""
It returns an encrypted string which can be decrypted just by the
password.
"""
key = password_to_key(password)
IV = make_initialization_vector()
encryptor = AES.new(key, AES.MODE_CBC, IV)
# store the IV at the beginning and encrypt
return IV + encryptor.encrypt(pad_string(string))
def decrypt(string, password):
key = password_to_key(password)
# extract the IV from the beginning
IV = string[:AES.block_size]
decryptor = AES.new(key, AES.MODE_CBC, IV)
string = decryptor.decrypt(string[AES.block_size:])
return unpad_string(string)
def password_to_key(password):
"""
Use SHA-256 over our password to get a proper-sized AES key.
This hashes our password into a 256 bit string.
"""
return SHA256.new(password).digest()
def make_initialization_vector():
"""
An initialization vector (IV) is a fixed-size input to a cryptographic
primitive that is typically required to be random or pseudorandom.
Randomization is crucial for encryption schemes to achieve semantic
security, a property whereby repeated usage of the scheme under the
same key does not allow an attacker to infer relationships
between segments of the encrypted message.
"""
return Random.new().read(AES.block_size)
def pad_string(string, chunk_size=AES.block_size):
"""
Pad string the peculirarity that uses the first byte
is used to store how much padding is applied
"""
assert chunk_size <= 256, 'We are using one byte to represent padding'
to_pad = (chunk_size - (len(string) + 1)) % chunk_size
return bytes([to_pad]) + string + bytes([0] * to_pad)
def unpad_string(string):
to_pad = string[0]
return string[1:-to_pad]
def encode(string):
"""
Base64 encoding schemes are commonly used when there is a need to encode
binary data that needs be stored and transferred over media that are
designed to deal with textual data.
This is to ensure that the data remains intact without
modification during transport.
"""
return base64.b64encode(string).decode("latin-1")
def decode(string):
return base64.b64decode(string.encode("latin-1"))
Tests:
def random_text(length):
def rand_lower():
return chr(randint(ord('a'), ord('z')))
string = ''.join([rand_lower() for _ in range(length)])
return bytes(string, encoding='utf-8')
def test_encoding():
string = random_text(100)
assert encode(string) != string
assert decode(encode(string)) == string
def test_padding():
assert len(pad_string(random_text(14))) == 16
assert len(pad_string(random_text(15))) == 16
assert len(pad_string(random_text(16))) == 32
def test_encryption():
string = random_text(100)
password = random_text(20)
assert encrypt(string, password) != string
assert decrypt(encrypt(string, password), password) == string
- If you are going to use mentioned database to authorise users, you should use hashes or message digests of user's passwords, instead of 2 way encryption algorithms, that would make your data hard to use even in case of db leakage.
- You cannot use above method to protect data that needs to be decrypted at some point, but even then you can use more secure way than just encrypting user passwords using some fixed key (which is the worst method). Take a look at OWASP's Password Storage Cheat Sheet.
As you wrote "I want to be able to encrypt/decrypt the message", I'm attaching a simple python source (tested under 2.7) for encr/decr using Blowfish.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import os
from Crypto.Cipher import Blowfish # pip install pycrypto
BS = 8
pad = lambda s: s + (BS - len(s) % BS) * chr(BS - len(s) % BS)
unpad = lambda s : s[0:-ord(s[-1])]
def doEncrypt(phrase, key):
c1 = Blowfish.new(key, Blowfish.MODE_ECB)
return c1.encrypt(pad(phrase))
def doDecrypt(phrase, key):
c1 = Blowfish.new(key, Blowfish.MODE_ECB)
return unpad(c1.decrypt(phrase))
def testing123(phrase, key):
encrypted = doEncrypt(phrase, key)
decrypted = doDecrypt(encrypted, key)
assert phrase == decrypted, "Blowfish ECB enc/dec verification failed"
print ("Blowfish ECB enc/dec verified ok")
print ('phrase/key(hex)/enc+dec: {}/{}/{}'.format(phrase, key.encode('hex'), decrypted))
if __name__== "__main__":
phrase= 'Ala ma kota, a kot ma AIDS.'
key= os.urandom(32)
testing123(phrase, key)
Here's how to do it properly in CBC mode, including PKCS#7 padding:
import base64
from Crypto.Cipher import AES
from Crypto.Hash import SHA256
from Crypto import Random
def encrypt(key, source, encode=True):
key = SHA256.new(key).digest() # use SHA-256 over our key to get a proper-sized AES key
IV = Random.new().read(AES.block_size) # generate IV
encryptor = AES.new(key, AES.MODE_CBC, IV)
padding = AES.block_size - len(source) % AES.block_size # calculate needed padding
source += bytes([padding]) * padding # Python 2.x: source += chr(padding) * padding
data = IV + encryptor.encrypt(source) # store the IV at the beginning and encrypt
return base64.b64encode(data).decode("latin-1") if encode else data
def decrypt(key, source, decode=True):
if decode:
source = base64.b64decode(source.encode("latin-1"))
key = SHA256.new(key).digest() # use SHA-256 over our key to get a proper-sized AES key
IV = source[:AES.block_size] # extract the IV from the beginning
decryptor = AES.new(key, AES.MODE_CBC, IV)
data = decryptor.decrypt(source[AES.block_size:]) # decrypt
padding = data[-1] # pick the padding value from the end; Python 2.x: ord(data[-1])
if data[-padding:] != bytes([padding]) * padding: # Python 2.x: chr(padding) * padding
raise ValueError("Invalid padding...")
return data[:-padding] # remove the padding
It's set to work with bytes
data, so if you want to encrypt strings or use string passwords make sure you encode()
them with a proper codec before passing them to the methods. If you leave the encode
parameter to True
the encrypt()
output will be base64 encoded string, and decrypt()
source should be also base64 string.
Now if you test it as:
my_password = b"secret_AES_key_string_to_encrypt/decrypt_with"
my_data = b"input_string_to_encrypt/decrypt"
print("key: {}".format(my_password))
print("data: {}".format(my_data))
encrypted = encrypt(my_password, my_data)
print("\nenc: {}".format(encrypted))
decrypted = decrypt(my_password, encrypted)
print("dec: {}".format(decrypted))
print("\ndata match: {}".format(my_data == decrypted))
print("\nSecond round....")
encrypted = encrypt(my_password, my_data)
print("\nenc: {}".format(encrypted))
decrypted = decrypt(my_password, encrypted)
print("dec: {}".format(decrypted))
print("\ndata match: {}".format(my_data == decrypted))
your output would be similar to:
key: b'secret_AES_key_string_to_encrypt/decrypt_with'
data: b'input_string_to_encrypt/decrypt'
enc: 7roSO+P/4eYdyhCbZmraVfc305g5P8VhDBOUDGrXmHw8h5ISsS3aPTGfsTSqn9f5
dec: b'input_string_to_encrypt/decrypt'
data match: True
Second round....
enc: BQm8FeoPx1H+bztlZJYZH9foI+IKAorCXRsMjbiYQkqLWbGU3NU50OsR+L9Nuqm6
dec: b'input_string_to_encrypt/decrypt'
data match: True
Proving that same key and same data still produce different ciphertext each time.
Now, this is much better than ECB but... if you're going to use this for communication - don't! This is more to explain how it should be constructed, not really to be used in a production environment and especially not for communication as its missing a crucial ingredient - message authentication. Feel free to play with it, but you should not roll your own crypto, there are well vetted protocols that will help you avoid the common pitfalls and you should use those.