#Pandas
#We use the pd.DataFrame and its methods
import pandas as pd
#Arelle
#... but this won't work
#... and if you execute it, you risk tainting other lines
#from arelle.Arelle.arelle import CntlrWinMain
#Instead change the current directory so PythonUtil can be found at './arelle/PythonUtil.py'
import os
print(os.getcwd())
#In my case... (adjust for your case)
os.chdir('C:/Anaconda3/Lib/arelle/Arelle')
print(os.getcwd())
#Make sure this returns True
#If it doesn't, repeat the above line of code (os.chdir)
dir_name = 'arelle' if 'arelle' in os.listdir('.') else 'Arelle'
'PythonUtil.py' in os.listdir(dir_name) and 'Cntlr.py' in os.listdir(dir_name)
from arelle import Cntlr
http://www.sec.gov/cgi-bin/browse-edgar?CIK=oflx&Find=Search&owner=exclude&action=getcompany
Using Nick Kraakma's API for filings, I can easily retrieve the last 5 filings for OFLX:
from urllib.request import urlopen
#https://docs.python.org/3.0/library/urllib.request.html
from lxml import etree
def get_latest_filings(ticker,how_many=5):
#Format a URL with a ticker passed into function
url = 'https://valuespreadsheet.com/iedgar/results.php?stock={}&output=xml'.format(ticker)
#Parse it using the lxml elementTree API
et = etree.fromstring(urlopen(url).read())
#Return the instanceUrl for each filing the list
return [_.find('instanceUrl').text for _ in et.find('filings')[:min(how_many,len(et.find('filings')))]]
ticker = 'OFLX'
oflx_xbrl_filings_past_4_quarters = get_latest_filings(ticker)
oflx_xbrl_filings_past_4_quarters
oflx_xbrl_filing_last_q = oflx_xbrl_filings_past_4_quarters[0]
arelle_oflx_last_q = Cntlr.Cntlr().modelManager.load(oflx_xbrl_filing_last_q)
arelle_oflx_last_q.facts[:5]
panda_oflx_last_q=pd.DataFrame(data=[(fact,
fact.value,
fact.concept.qname,
fact.context.hasSegment,
fact.context.endDatetime)
for fact in arelle_oflx_last_q.facts],
columns=('Fact', #Fact; We leave the fact in the table in case we want more out of it
'Value', #Value, like 5,200,000 in "5.2m"
'Account', #Account, like "Cash" in "Cash of 5.2m"
'Category', #Category, like "for the parent company" in "Cash of 5.2m for the parent company"b
'Time')) #Time, like 2015/12/31 in "Cash of 5.2m for the parent company as of 2015"
panda_oflx_last_q.head()
#panda_oflx_last_q.Time.dtype
panda_oflx_last_q.Account = panda_oflx_last_q.Account.astype(str)
panda_oflx_last_q.Time = pd.to_datetime(panda_oflx_last_q.Time)
panda_oflx_last_q.head()
panda_oflx_last_q.index = panda_oflx_last_q.Time
panda_oflx_last_q.head()
panda_oflx_last_q_filtered = panda_oflx_last_q[(panda_oflx_last_q.Fact.map(lambda f: f.isNumeric and not f.isNil) & #Fact is Numeric (i.e. can be converted to a number)
panda_oflx_last_q.Account.map(lambda a: a=='us-gaap:Cash') & #Just Cash; Account mentions "Cash", anywhere would give us too many values
panda_oflx_last_q.Category.map(lambda c:not c))] # Could also use "~", which does boolean "not" on the entire column
panda_oflx_last_q_filtered
panda_oflx_last_q_filtered.Value = panda_oflx_last_q_filtered.Value.astype(float)
%matplotlib inline
panda_oflx_last_q_filtered.plot(y='Value', #Set the Value column to be plotted on the y-axis
x='Time') #The "Time" is implicitly used on the x-axis; but we can be explicit))
#If we were dealing with other types of cash
#panda_oflx_last_q_filtered.Account = panda_oflx_last_q_filtered.Account.str.slice(len('us-gaap:'),15)
#grouped_by_account = panda_oflx_last_q_filtered.pivot(index='Time', columns='Account', values='Value')
def all_steps(path_to_filing, visualize=False, cash_element='us-gaap:Cash'):
#Line 1) Load
arelle_xbrl = Cntlr.Cntlr().modelManager.load(path_to_filing)
#Line 2) Map
panda_xbrl =pd.DataFrame(data=[(fact,
fact.value,
fact.concept.qname,
fact.context.hasSegment,
fact.context.endDatetime)
for fact in arelle_xbrl.facts],
columns=('Fact', #Fact; We leave the fact in the table in case we want more out of it
'Value', #Value, like 5,200,000 in "5.2m"
'Account', #Account, like "Cash" in "Cash of 5.2m"
'Category', #Category, like "for the parent company" in "Cash of 5.2m for the parent company"b
'Time'))
#Line 3) Reduce
panda_xbrl_filtered = panda_xbrl[(panda_xbrl.Fact.map(lambda f: f.isNumeric and not f.isNil) & #Fact is Numeric (i.e. can be converted to a number)
panda_xbrl.Account.map(lambda a: str(a)==cash_element) & #Just Cash; Account mentions "Cash", anywhere would give us too many values
panda_xbrl.Category.map(lambda c:not c))] # Could also use "~", which does boolean "not" on the entire column
#Line 4) Transform
panda_xbrl_filtered.Time = pd.to_datetime(panda_xbrl_filtered.Time);
panda_xbrl_filtered.Value = panda_xbrl_filtered.Value.astype(float)
#Line 5) Visualize
if visualize:
panda_xbrl_filtered.plot(x='Time', y='Value')
return panda_xbrl_filtered[['Value','Time']]
all_steps(oflx_xbrl_filings_past_4_quarters[0], True)
all_steps(oflx_xbrl_filings_past_4_quarters[1], True)
all_steps(oflx_xbrl_filings_past_4_quarters[2], True)
all_steps(oflx_xbrl_filings_past_4_quarters[3], True)
all_quarters = [all_steps(quarterly_filing) for quarterly_filing in oflx_xbrl_filings_past_4_quarters]
def join_all_quarters(all_quarters):
seed_quarter = all_quarters[0]
for quarter in all_quarters[1:]:
seed_quarter = seed_quarter.append(quarter)
return seed_quarter
seed_quarter = join_all_quarters(all_quarters)
seed_quarter.plot(x='Time', y='Value')
co_filing_list=[('FTNT (FORTINET)',['http://www.sec.gov/Archives/edgar/data/1262039/000126203914000039/ftnt-20140930.xml',
'http://www.sec.gov/Archives/edgar/data/1262039/000126203914000033/ftnt-20140630.xml',
'http://www.sec.gov/Archives/edgar/data/1262039/000126203914000025/ftnt-20140331.xml',
'http://www.sec.gov/Archives/edgar/data/1262039/000126203914000007/ftnt-20131231.xml'
]),
('CARB (Carbonite)',['http://www.sec.gov/Archives/edgar/data/1340127/000119312514401338/carb-20140930.xml',
'http://www.sec.gov/Archives/edgar/data/1340127/000119312514295793/carb-20140630.xml',
'http://www.sec.gov/Archives/edgar/data/1340127/000119312514185139/carb-20140331.xml',
'http://www.sec.gov/Archives/edgar/data/1340127/000119312514084500/carb-20131231.xml']),
('PLPM (Planet Payment)',['http://www.sec.gov/Archives/edgar/data/1362925/000110465914077800/plpm-20140930.xml',
'http://www.sec.gov/Archives/edgar/data/1362925/000110465914059427/plpm-20140630.xml',
'http://www.sec.gov/Archives/edgar/data/1362925/000110465914037677/plpm-20140331.xml',
'http://www.sec.gov/Archives/edgar/data/1362925/000104746914001920/plpm-20131231.xml'
])
]
[(print(co), join_all_quarters([all_steps(q, cash_element='us-gaap:CashAndCashEquivalentsAtCarryingValue')
for q in co_qs]).plot(x='Time', y='Value'))
for co, co_qs in co_filing_list]