retriever.engines package

Submodules

retriever.engines.csvengine module

class retriever.engines.csvengine.DummyConnection

Bases: object

close()
commit()
cursor()
rollback()
class retriever.engines.csvengine.DummyCursor

Bases: retriever.engines.csvengine.DummyConnection

class retriever.engines.csvengine.engine

Bases: retriever.lib.engine.Engine

Engine instance for writing data to a CSV file.

abbreviation = 'csv'
create_db()

Override create_db since there is no database just a CSV file

create_table()

Create the table by creating an empty csv file

datatypes = {'decimal': 'REAL', 'bigint': 'INTEGER', 'int': 'INTEGER', 'bool': 'INTEGER', 'auto': 'INTEGER', 'double': 'REAL', 'char': 'TEXT'}
disconnect()

Close the last file in the dataset

execute(statement, commit=True)

Write a line to the output file

format_insert_value(value, datatype)

Formats a value for an insert statement

get_connection()

Gets the db connection.

insert_statement(values)

Returns a comma delimited row of values

name = 'CSV'
required_opts = [('table_name', 'Format of table name', './{db}_{table}.csv')]
table_exists(db_name, table_name)

Check to see if the data file currently exists

table_names = []
to_csv()

Export sorted version of CSV file

retriever.engines.download_only module

class retriever.engines.download_only.DummyConnection

Bases: object

close()
commit()
cursor()
rollback()
class retriever.engines.download_only.DummyCursor

Bases: retriever.engines.download_only.DummyConnection

retriever.engines.download_only.dummy_method(self, *args, **kwargs)
class retriever.engines.download_only.engine

Bases: retriever.lib.engine.Engine

Engine instance for writing data to a CSV file.

abbreviation = 'download'
auto_create_table(table, url=None, filename=None, pk=None)

Download the file if it doesn’t exist

create_db(*args, **kwargs)
final_cleanup()

Copies downloaded files to desired directory

Copies the downloaded files into the chosen directory unless files with the same name already exist in the directory.

find_file(filename)

Checks for the given file and adds it to the list of all files

get_connection()

Gets the db connection.

insert_data_from_file(*args, **kwargs)
insert_data_from_url(url)

Insert data from a web resource

name = 'Download Only'
register_files(filenames)

Identify a list of files to be moved by the download

When downloading archives with multiple files the engine needs to be informed of all of the file names so that it can move them.

required_opts = [('path', 'File path to copy data files', './'), ('subdir', 'Keep the subdirectories for archived files', False)]
table_exists(dbname, tablename)

Checks if the file to be downloaded already exists

retriever.engines.jsonengine module

Engine for writing data to a JSON file

class retriever.engines.jsonengine.DummyConnection

Bases: object

close()
commit()
cursor()
rollback()
class retriever.engines.jsonengine.DummyCursor

Bases: retriever.engines.jsonengine.DummyConnection

class retriever.engines.jsonengine.engine

Bases: retriever.lib.engine.Engine

Engine instance for writing data to a CSV file.

abbreviation = 'json'
create_db()

Override create_db since there is no database just a JSON file

create_table()

Create the table by creating an empty json file

datatypes = {'decimal': 'REAL', 'bigint': 'INTEGER', 'int': 'INTEGER', 'bool': 'INTEGER', 'auto': 'INTEGER', 'double': 'REAL', 'char': 'TEXT'}
disconnect()

Close out the JSON with a n]} and close the file.

Close all the file objects that have been created Re-write the files stripping off the last comma and then close with a n]}.

execute(statement, commit=True)

Write a line to the output file

format_insert_value(value, datatype)

Formats a value for an insert statement

get_connection()

Gets the db connection.

insert_statement(values)
name = 'JSON'
required_opts = [('table_name', 'Format of table name', './{db}_{table}.json')]
table_exists(dbname, tablename)

Check to see if the data file currently exists

table_names = []
to_csv()

Export table from json engine to CSV file

retriever.engines.msaccess module

class retriever.engines.msaccess.engine

Bases: retriever.lib.engine.Engine

Engine instance for Microsoft Access.

abbreviation = 'msaccess'
convert_data_type(datatype)

MS Access can’t handle complex Decimal types

create_db()

MS Access doesn’t create databases.

datatypes = {'decimal': 'NUMERIC', 'bigint': 'INTEGER', 'int': 'INTEGER', 'bool': 'BIT', 'auto': 'AUTOINCREMENT', 'double': 'NUMERIC', 'char': 'VARCHAR'}
drop_statement(objecttype, objectname)

Returns a drop table or database SQL statement.

escape_single_quotes(value)

Escapes the single quotes in the value

get_connection()

Gets the db connection.

insert_data_from_file(filename)

Perform a bulk insert.

instructions = 'Create a database in Microsoft Access, close Access, then \nselect your database file using this dialog.'
name = 'Microsoft Access'
required_opts = [('file', 'Enter the filename of your Access database', './access.mdb', 'Access databases (*.mdb, *.accdb)|*.mdb;*.accdb'), ('table_name', 'Format of table name', '[{db} {table}]')]
table_exists(dbname, tablename)

Determine if the table already exists in the database

retriever.engines.mysql module

class retriever.engines.mysql.engine

Bases: retriever.lib.engine.Engine

Engine instance for MySQL.

abbreviation = 'mysql'
create_db_statement()

Returns a SQL statement to create a database.

datatypes = {'decimal': 'DECIMAL', 'bigint': 'BIGINT', 'int': 'INT', 'bool': 'BOOL', 'auto': 'INT(5) NOT NULL AUTO_INCREMENT', 'double': 'DOUBLE', 'char': ('TEXT', 'VARCHAR')}
get_connection()

Gets the db connection.

insert_data_from_file(filename)

Calls MySQL “LOAD DATA LOCAL INFILE” statement to perform a bulk insert.

max_int = 4294967295
name = 'MySQL'
required_opts = [('user', 'Enter your MySQL username', 'root'), ('password', 'Enter your password', ''), ('host', 'Enter your MySQL host', 'localhost'), ('port', 'Enter your MySQL port', 3306), ('database_name', 'Format of database name', '{db}'), ('table_name', 'Format of table name', '{db}.{table}')]
set_engine_encoding()

Set MySQL database encoding to match data encoding

Please update the encoding lookup table if the required encoding is not present.

table_exists(dbname, tablename)

Checks to see if the given table exists

retriever.engines.postgres module

class retriever.engines.postgres.engine

Bases: retriever.lib.engine.Engine

Engine instance for PostgreSQL.

abbreviation = 'postgres'
create_db()

Creates the database

create_db_statement()

In PostgreSQL, the equivalent of a SQL database is a schema.

create_table()

PostgreSQL needs to commit operations individually.

datatypes = {'decimal': 'decimal', 'bigint': 'bigint', 'int': 'integer', 'bool': 'boolean', 'auto': 'serial', 'double': 'double precision', 'char': 'varchar'}
drop_statement(objecttype, objectname)

In PostgreSQL, the equivalent of a SQL database is a schema.

escape_single_quotes(value)

Escapes single quotes in the value

format_insert_value(value, datatype)

Formats a value for an insert statement

get_connection()

Gets the db connection.

Please update the encoding lookup table if the required encoding is not present.

insert_data_from_file(filename)

Use PostgreSQL’s “COPY FROM” statement to perform a bulk insert.

insert_statement(values)

Returns a SQL statement to insert a set of values

max_int = 2147483647
name = 'PostgreSQL'
required_opts = [('user', 'Enter your PostgreSQL username', 'postgres'), ('password', 'Enter your password', ''), ('host', 'Enter your PostgreSQL host', 'localhost'), ('port', 'Enter your PostgreSQL port', 5432), ('database', 'Enter your PostgreSQL database name', 'postgres'), ('database_name', 'Format of schema name', '{db}'), ('table_name', 'Format of table name', '{db}.{table}')]
table_exists(dbname, tablename)

Checks to see if the given table exists

retriever.engines.sqlite module

class retriever.engines.sqlite.engine

Bases: retriever.lib.engine.Engine

Engine instance for SQLite.

abbreviation = 'sqlite'
create_db()

SQLite doesn’t create databases; each database is a file and needs a separate connection.

datatypes = {'decimal': 'REAL', 'bigint': 'INTEGER', 'int': 'INTEGER', 'bool': 'INTEGER', 'auto': ('INTEGER', 'AUTOINCREMENT'), 'double': 'REAL', 'char': 'TEXT'}
escape_single_quotes(line)

Escapes single quotes in the line

get_bulk_insert_statement()

Get insert statement for bulk inserts

This places ?’s instead of the actual values so that executemany() can operate as designed

get_connection()

Gets the db connection.

name = 'SQLite'
required_opts = [('file', 'Enter the filename of your SQLite database', './sqlite.db', ''), ('table_name', 'Format of table name', '{db}_{table}')]
table_exists(dbname, tablename)

Determine if the table already exists in the database

to_csv()

retriever.engines.xmlengine module

class retriever.engines.xmlengine.DummyConnection

Bases: object

close()
commit()
cursor()
rollback()
class retriever.engines.xmlengine.DummyCursor

Bases: retriever.engines.xmlengine.DummyConnection

class retriever.engines.xmlengine.engine

Bases: retriever.lib.engine.Engine

Engine instance for writing data to a XML file.

abbreviation = 'xml'
create_db()

Override create_db since there is no database just an XML file

create_table()

Create the table by creating an empty XML file

datatypes = {'decimal': 'REAL', 'bigint': 'INTEGER', 'int': 'INTEGER', 'bool': 'INTEGER', 'auto': 'INTEGER', 'double': 'REAL', 'char': 'TEXT'}
disconnect()

Close out the xml files

Close all the file objects that have been created Re-write the files stripping off the last comma and then close with a closing tag)

execute(statement, commit=True)

Write a line to the output file

format_insert_value(value, datatype)

Formats a value for an insert statement

get_connection()

Gets the db connection.

insert_statement(values)
name = 'XML'
required_opts = [('table_name', 'Format of table name', './{db}_{table}.xml')]
table_exists(dbname, tablename)

Check to see if the data file currently exists

table_names = []
to_csv()

Export table from xml engine to CSV file

Module contents

Contains DBMS-specific Engine implementations.